1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included 12bf215546Sopenharmony_ci * in all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci/** 24bf215546Sopenharmony_ci * @file crocus_state.c 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci * ============================= GENXML CODE ============================= 27bf215546Sopenharmony_ci * [This file is compiled once per generation.] 28bf215546Sopenharmony_ci * ======================================================================= 29bf215546Sopenharmony_ci * 30bf215546Sopenharmony_ci * This is the main state upload code. 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * Gallium uses Constant State Objects, or CSOs, for most state. Large, 33bf215546Sopenharmony_ci * complex, or highly reusable state can be created once, and bound and 34bf215546Sopenharmony_ci * rebound multiple times. This is modeled with the pipe->create_*_state() 35bf215546Sopenharmony_ci * and pipe->bind_*_state() hooks. Highly dynamic or inexpensive state is 36bf215546Sopenharmony_ci * streamed out on the fly, via pipe->set_*_state() hooks. 37bf215546Sopenharmony_ci * 38bf215546Sopenharmony_ci * OpenGL involves frequently mutating context state, which is mirrored in 39bf215546Sopenharmony_ci * core Mesa by highly mutable data structures. However, most applications 40bf215546Sopenharmony_ci * typically draw the same things over and over - from frame to frame, most 41bf215546Sopenharmony_ci * of the same objects are still visible and need to be redrawn. So, rather 42bf215546Sopenharmony_ci * than inventing new state all the time, applications usually mutate to swap 43bf215546Sopenharmony_ci * between known states that we've seen before. 44bf215546Sopenharmony_ci * 45bf215546Sopenharmony_ci * Gallium isolates us from this mutation by tracking API state, and 46bf215546Sopenharmony_ci * distilling it into a set of Constant State Objects, or CSOs. Large, 47bf215546Sopenharmony_ci * complex, or typically reusable state can be created once, then reused 48bf215546Sopenharmony_ci * multiple times. Drivers can create and store their own associated data. 49bf215546Sopenharmony_ci * This create/bind model corresponds to the pipe->create_*_state() and 50bf215546Sopenharmony_ci * pipe->bind_*_state() driver hooks. 51bf215546Sopenharmony_ci * 52bf215546Sopenharmony_ci * Some state is cheap to create, or expected to be highly dynamic. Rather 53bf215546Sopenharmony_ci * than creating and caching piles of CSOs for these, Gallium simply streams 54bf215546Sopenharmony_ci * them out, via the pipe->set_*_state() driver hooks. 55bf215546Sopenharmony_ci * 56bf215546Sopenharmony_ci * To reduce draw time overhead, we try to compute as much state at create 57bf215546Sopenharmony_ci * time as possible. Wherever possible, we translate the Gallium pipe state 58bf215546Sopenharmony_ci * to 3DSTATE commands, and store those commands in the CSO. At draw time, 59bf215546Sopenharmony_ci * we can simply memcpy them into a batch buffer. 60bf215546Sopenharmony_ci * 61bf215546Sopenharmony_ci * No hardware matches the abstraction perfectly, so some commands require 62bf215546Sopenharmony_ci * information from multiple CSOs. In this case, we can store two copies 63bf215546Sopenharmony_ci * of the packet (one in each CSO), and simply | together their DWords at 64bf215546Sopenharmony_ci * draw time. Sometimes the second set is trivial (one or two fields), so 65bf215546Sopenharmony_ci * we simply pack it at draw time. 66bf215546Sopenharmony_ci * 67bf215546Sopenharmony_ci * There are two main components in the file below. First, the CSO hooks 68bf215546Sopenharmony_ci * create/bind/track state. The second are the draw-time upload functions, 69bf215546Sopenharmony_ci * crocus_upload_render_state() and crocus_upload_compute_state(), which read 70bf215546Sopenharmony_ci * the context state and emit the commands into the actual batch. 71bf215546Sopenharmony_ci */ 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci#include <errno.h> 74bf215546Sopenharmony_ci#include <stdio.h> 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci#if HAVE_VALGRIND 77bf215546Sopenharmony_ci#include <memcheck.h> 78bf215546Sopenharmony_ci#include <valgrind.h> 79bf215546Sopenharmony_ci#define VG(x) x 80bf215546Sopenharmony_ci#ifdef DEBUG 81bf215546Sopenharmony_ci#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) 82bf215546Sopenharmony_ci#endif 83bf215546Sopenharmony_ci#else 84bf215546Sopenharmony_ci#define VG(x) 85bf215546Sopenharmony_ci#endif 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci#include "drm-uapi/i915_drm.h" 88bf215546Sopenharmony_ci#include "intel/common/intel_l3_config.h" 89bf215546Sopenharmony_ci#include "intel/common/intel_sample_positions.h" 90bf215546Sopenharmony_ci#include "intel/compiler/brw_compiler.h" 91bf215546Sopenharmony_ci#include "compiler/shader_info.h" 92bf215546Sopenharmony_ci#include "pipe/p_context.h" 93bf215546Sopenharmony_ci#include "pipe/p_defines.h" 94bf215546Sopenharmony_ci#include "pipe/p_screen.h" 95bf215546Sopenharmony_ci#include "pipe/p_state.h" 96bf215546Sopenharmony_ci#include "util/format/u_format.h" 97bf215546Sopenharmony_ci#include "util/half_float.h" 98bf215546Sopenharmony_ci#include "util/u_dual_blend.h" 99bf215546Sopenharmony_ci#include "util/u_framebuffer.h" 100bf215546Sopenharmony_ci#include "util/u_helpers.h" 101bf215546Sopenharmony_ci#include "util/u_inlines.h" 102bf215546Sopenharmony_ci#include "util/u_memory.h" 103bf215546Sopenharmony_ci#include "util/u_prim.h" 104bf215546Sopenharmony_ci#include "util/u_transfer.h" 105bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 106bf215546Sopenharmony_ci#include "util/u_viewport.h" 107bf215546Sopenharmony_ci#include "crocus_batch.h" 108bf215546Sopenharmony_ci#include "crocus_context.h" 109bf215546Sopenharmony_ci#include "crocus_defines.h" 110bf215546Sopenharmony_ci#include "crocus_pipe.h" 111bf215546Sopenharmony_ci#include "crocus_resource.h" 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci#include "crocus_genx_macros.h" 114bf215546Sopenharmony_ci#include "intel/common/intel_guardband.h" 115bf215546Sopenharmony_ci#include "main/macros.h" /* UNCLAMPED_* */ 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci/** 118bf215546Sopenharmony_ci * Statically assert that PIPE_* enums match the hardware packets. 119bf215546Sopenharmony_ci * (As long as they match, we don't need to translate them.) 120bf215546Sopenharmony_ci */ 121bf215546Sopenharmony_ciUNUSED static void pipe_asserts() 122bf215546Sopenharmony_ci{ 123bf215546Sopenharmony_ci#define PIPE_ASSERT(x) STATIC_ASSERT((int)x) 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci /* pipe_logicop happens to match the hardware. */ 126bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR); 127bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR); 128bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED); 129bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED); 130bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE); 131bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT); 132bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR); 133bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND); 134bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND); 135bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV); 136bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP); 137bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED); 138bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY); 139bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE); 140bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR); 141bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci /* pipe_blend_func happens to match the hardware. */ 144bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE); 145bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR); 146bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA); 147bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA); 148bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR); 149bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE); 150bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR); 151bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA); 152bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR); 153bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA); 154bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO); 155bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR); 156bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA); 157bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA); 158bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR); 159bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR); 160bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA); 161bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR); 162bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci /* pipe_blend_func happens to match the hardware. */ 165bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD); 166bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT); 167bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT); 168bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN); 169bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci /* pipe_stencil_op happens to match the hardware. */ 172bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP); 173bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO); 174bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE); 175bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT); 176bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT); 177bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR); 178bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR); 179bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci#if GFX_VER >= 6 182bf215546Sopenharmony_ci /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */ 183bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT); 184bf215546Sopenharmony_ci PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT); 185bf215546Sopenharmony_ci#endif 186bf215546Sopenharmony_ci#undef PIPE_ASSERT 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistatic unsigned 190bf215546Sopenharmony_citranslate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci static const unsigned map[] = { 193bf215546Sopenharmony_ci [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, 194bf215546Sopenharmony_ci [PIPE_PRIM_LINES] = _3DPRIM_LINELIST, 195bf215546Sopenharmony_ci [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, 196bf215546Sopenharmony_ci [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, 197bf215546Sopenharmony_ci [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, 198bf215546Sopenharmony_ci [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, 199bf215546Sopenharmony_ci [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, 200bf215546Sopenharmony_ci [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, 201bf215546Sopenharmony_ci [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, 202bf215546Sopenharmony_ci [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, 203bf215546Sopenharmony_ci#if GFX_VER >= 6 204bf215546Sopenharmony_ci [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, 205bf215546Sopenharmony_ci [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, 206bf215546Sopenharmony_ci [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, 207bf215546Sopenharmony_ci [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, 208bf215546Sopenharmony_ci#endif 209bf215546Sopenharmony_ci#if GFX_VER >= 7 210bf215546Sopenharmony_ci [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1, 211bf215546Sopenharmony_ci#endif 212bf215546Sopenharmony_ci }; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0); 215bf215546Sopenharmony_ci} 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_cistatic unsigned 218bf215546Sopenharmony_citranslate_compare_func(enum pipe_compare_func pipe_func) 219bf215546Sopenharmony_ci{ 220bf215546Sopenharmony_ci static const unsigned map[] = { 221bf215546Sopenharmony_ci [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER, 222bf215546Sopenharmony_ci [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS, 223bf215546Sopenharmony_ci [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL, 224bf215546Sopenharmony_ci [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL, 225bf215546Sopenharmony_ci [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER, 226bf215546Sopenharmony_ci [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL, 227bf215546Sopenharmony_ci [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL, 228bf215546Sopenharmony_ci [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS, 229bf215546Sopenharmony_ci }; 230bf215546Sopenharmony_ci return map[pipe_func]; 231bf215546Sopenharmony_ci} 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_cistatic unsigned 234bf215546Sopenharmony_citranslate_shadow_func(enum pipe_compare_func pipe_func) 235bf215546Sopenharmony_ci{ 236bf215546Sopenharmony_ci /* Gallium specifies the result of shadow comparisons as: 237bf215546Sopenharmony_ci * 238bf215546Sopenharmony_ci * 1 if ref <op> texel, 239bf215546Sopenharmony_ci * 0 otherwise. 240bf215546Sopenharmony_ci * 241bf215546Sopenharmony_ci * The hardware does: 242bf215546Sopenharmony_ci * 243bf215546Sopenharmony_ci * 0 if texel <op> ref, 244bf215546Sopenharmony_ci * 1 otherwise. 245bf215546Sopenharmony_ci * 246bf215546Sopenharmony_ci * So we need to flip the operator and also negate. 247bf215546Sopenharmony_ci */ 248bf215546Sopenharmony_ci static const unsigned map[] = { 249bf215546Sopenharmony_ci [PIPE_FUNC_NEVER] = PREFILTEROP_ALWAYS, 250bf215546Sopenharmony_ci [PIPE_FUNC_LESS] = PREFILTEROP_LEQUAL, 251bf215546Sopenharmony_ci [PIPE_FUNC_EQUAL] = PREFILTEROP_NOTEQUAL, 252bf215546Sopenharmony_ci [PIPE_FUNC_LEQUAL] = PREFILTEROP_LESS, 253bf215546Sopenharmony_ci [PIPE_FUNC_GREATER] = PREFILTEROP_GEQUAL, 254bf215546Sopenharmony_ci [PIPE_FUNC_NOTEQUAL] = PREFILTEROP_EQUAL, 255bf215546Sopenharmony_ci [PIPE_FUNC_GEQUAL] = PREFILTEROP_GREATER, 256bf215546Sopenharmony_ci [PIPE_FUNC_ALWAYS] = PREFILTEROP_NEVER, 257bf215546Sopenharmony_ci }; 258bf215546Sopenharmony_ci return map[pipe_func]; 259bf215546Sopenharmony_ci} 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_cistatic unsigned 262bf215546Sopenharmony_citranslate_cull_mode(unsigned pipe_face) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci static const unsigned map[4] = { 265bf215546Sopenharmony_ci [PIPE_FACE_NONE] = CULLMODE_NONE, 266bf215546Sopenharmony_ci [PIPE_FACE_FRONT] = CULLMODE_FRONT, 267bf215546Sopenharmony_ci [PIPE_FACE_BACK] = CULLMODE_BACK, 268bf215546Sopenharmony_ci [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH, 269bf215546Sopenharmony_ci }; 270bf215546Sopenharmony_ci return map[pipe_face]; 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci#if GFX_VER >= 6 274bf215546Sopenharmony_cistatic unsigned 275bf215546Sopenharmony_citranslate_fill_mode(unsigned pipe_polymode) 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci static const unsigned map[4] = { 278bf215546Sopenharmony_ci [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID, 279bf215546Sopenharmony_ci [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, 280bf215546Sopenharmony_ci [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT, 281bf215546Sopenharmony_ci [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID, 282bf215546Sopenharmony_ci }; 283bf215546Sopenharmony_ci return map[pipe_polymode]; 284bf215546Sopenharmony_ci} 285bf215546Sopenharmony_ci#endif 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_cistatic unsigned 288bf215546Sopenharmony_citranslate_mip_filter(enum pipe_tex_mipfilter pipe_mip) 289bf215546Sopenharmony_ci{ 290bf215546Sopenharmony_ci static const unsigned map[] = { 291bf215546Sopenharmony_ci [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST, 292bf215546Sopenharmony_ci [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR, 293bf215546Sopenharmony_ci [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE, 294bf215546Sopenharmony_ci }; 295bf215546Sopenharmony_ci return map[pipe_mip]; 296bf215546Sopenharmony_ci} 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_cistatic uint32_t 299bf215546Sopenharmony_citranslate_wrap(unsigned pipe_wrap, bool either_nearest) 300bf215546Sopenharmony_ci{ 301bf215546Sopenharmony_ci static const unsigned map[] = { 302bf215546Sopenharmony_ci [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP, 303bf215546Sopenharmony_ci#if GFX_VER == 8 304bf215546Sopenharmony_ci [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER, 305bf215546Sopenharmony_ci#else 306bf215546Sopenharmony_ci [PIPE_TEX_WRAP_CLAMP] = TCM_CLAMP_BORDER, 307bf215546Sopenharmony_ci#endif 308bf215546Sopenharmony_ci [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP, 309bf215546Sopenharmony_ci [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, 310bf215546Sopenharmony_ci [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR, 311bf215546Sopenharmony_ci [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci /* These are unsupported. */ 314bf215546Sopenharmony_ci [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, 315bf215546Sopenharmony_ci [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, 316bf215546Sopenharmony_ci }; 317bf215546Sopenharmony_ci#if GFX_VER < 8 318bf215546Sopenharmony_ci if (pipe_wrap == PIPE_TEX_WRAP_CLAMP && either_nearest) 319bf215546Sopenharmony_ci return TCM_CLAMP; 320bf215546Sopenharmony_ci#endif 321bf215546Sopenharmony_ci return map[pipe_wrap]; 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci/** 325bf215546Sopenharmony_ci * Equiv if brw_state_batch 326bf215546Sopenharmony_ci */ 327bf215546Sopenharmony_cistatic uint32_t * 328bf215546Sopenharmony_cistream_state(struct crocus_batch *batch, 329bf215546Sopenharmony_ci unsigned size, 330bf215546Sopenharmony_ci unsigned alignment, 331bf215546Sopenharmony_ci uint32_t *out_offset) 332bf215546Sopenharmony_ci{ 333bf215546Sopenharmony_ci uint32_t offset = ALIGN(batch->state.used, alignment); 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (offset + size >= STATE_SZ && !batch->no_wrap) { 336bf215546Sopenharmony_ci crocus_batch_flush(batch); 337bf215546Sopenharmony_ci offset = ALIGN(batch->state.used, alignment); 338bf215546Sopenharmony_ci } else if (offset + size >= batch->state.bo->size) { 339bf215546Sopenharmony_ci const unsigned new_size = 340bf215546Sopenharmony_ci MIN2(batch->state.bo->size + batch->state.bo->size / 2, 341bf215546Sopenharmony_ci MAX_STATE_SIZE); 342bf215546Sopenharmony_ci crocus_grow_buffer(batch, true, batch->state.used, new_size); 343bf215546Sopenharmony_ci assert(offset + size < batch->state.bo->size); 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci crocus_record_state_size(batch->state_sizes, offset, size); 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci batch->state.used = offset + size; 349bf215546Sopenharmony_ci *out_offset = offset; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci return (uint32_t *)batch->state.map + (offset >> 2); 352bf215546Sopenharmony_ci} 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci/** 355bf215546Sopenharmony_ci * stream_state() + memcpy. 356bf215546Sopenharmony_ci */ 357bf215546Sopenharmony_cistatic uint32_t 358bf215546Sopenharmony_ciemit_state(struct crocus_batch *batch, const void *data, unsigned size, 359bf215546Sopenharmony_ci unsigned alignment) 360bf215546Sopenharmony_ci{ 361bf215546Sopenharmony_ci unsigned offset = 0; 362bf215546Sopenharmony_ci uint32_t *map = stream_state(batch, size, alignment, &offset); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci if (map) 365bf215546Sopenharmony_ci memcpy(map, data, size); 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci return offset; 368bf215546Sopenharmony_ci} 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci#if GFX_VER <= 5 371bf215546Sopenharmony_cistatic void 372bf215546Sopenharmony_ciupload_pipelined_state_pointers(struct crocus_batch *batch, 373bf215546Sopenharmony_ci bool gs_active, uint32_t gs_offset, 374bf215546Sopenharmony_ci uint32_t vs_offset, uint32_t sf_offset, 375bf215546Sopenharmony_ci uint32_t clip_offset, uint32_t wm_offset, uint32_t cc_offset) 376bf215546Sopenharmony_ci{ 377bf215546Sopenharmony_ci#if GFX_VER == 5 378bf215546Sopenharmony_ci /* Need to flush before changing clip max threads for errata. */ 379bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_FLUSH), foo); 380bf215546Sopenharmony_ci#endif 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) { 383bf215546Sopenharmony_ci pp.PointertoVSState = ro_bo(batch->state.bo, vs_offset); 384bf215546Sopenharmony_ci pp.GSEnable = gs_active; 385bf215546Sopenharmony_ci if (gs_active) 386bf215546Sopenharmony_ci pp.PointertoGSState = ro_bo(batch->state.bo, gs_offset); 387bf215546Sopenharmony_ci pp.ClipEnable = true; 388bf215546Sopenharmony_ci pp.PointertoCLIPState = ro_bo(batch->state.bo, clip_offset); 389bf215546Sopenharmony_ci pp.PointertoSFState = ro_bo(batch->state.bo, sf_offset); 390bf215546Sopenharmony_ci pp.PointertoWMState = ro_bo(batch->state.bo, wm_offset); 391bf215546Sopenharmony_ci pp.PointertoColorCalcState = ro_bo(batch->state.bo, cc_offset); 392bf215546Sopenharmony_ci } 393bf215546Sopenharmony_ci} 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci#endif 396bf215546Sopenharmony_ci/** 397bf215546Sopenharmony_ci * Did field 'x' change between 'old_cso' and 'new_cso'? 398bf215546Sopenharmony_ci * 399bf215546Sopenharmony_ci * (If so, we may want to set some dirty flags.) 400bf215546Sopenharmony_ci */ 401bf215546Sopenharmony_ci#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x)) 402bf215546Sopenharmony_ci#define cso_changed_memcmp(x) \ 403bf215546Sopenharmony_ci (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_cistatic void 406bf215546Sopenharmony_ciflush_before_state_base_change(struct crocus_batch *batch) 407bf215546Sopenharmony_ci{ 408bf215546Sopenharmony_ci#if GFX_VER >= 6 409bf215546Sopenharmony_ci /* Flush before emitting STATE_BASE_ADDRESS. 410bf215546Sopenharmony_ci * 411bf215546Sopenharmony_ci * This isn't documented anywhere in the PRM. However, it seems to be 412bf215546Sopenharmony_ci * necessary prior to changing the surface state base adress. We've 413bf215546Sopenharmony_ci * seen issues in Vulkan where we get GPU hangs when using multi-level 414bf215546Sopenharmony_ci * command buffers which clear depth, reset state base address, and then 415bf215546Sopenharmony_ci * go render stuff. 416bf215546Sopenharmony_ci * 417bf215546Sopenharmony_ci * Normally, in GL, we would trust the kernel to do sufficient stalls 418bf215546Sopenharmony_ci * and flushes prior to executing our batch. However, it doesn't seem 419bf215546Sopenharmony_ci * as if the kernel's flushing is always sufficient and we don't want to 420bf215546Sopenharmony_ci * rely on it. 421bf215546Sopenharmony_ci * 422bf215546Sopenharmony_ci * We make this an end-of-pipe sync instead of a normal flush because we 423bf215546Sopenharmony_ci * do not know the current status of the GPU. On Haswell at least, 424bf215546Sopenharmony_ci * having a fast-clear operation in flight at the same time as a normal 425bf215546Sopenharmony_ci * rendering operation can cause hangs. Since the kernel's flushing is 426bf215546Sopenharmony_ci * insufficient, we need to ensure that any rendering operations from 427bf215546Sopenharmony_ci * other processes are definitely complete before we try to do our own 428bf215546Sopenharmony_ci * rendering. It's a bit of a big hammer but it appears to work. 429bf215546Sopenharmony_ci */ 430bf215546Sopenharmony_ci const unsigned dc_flush = 431bf215546Sopenharmony_ci GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; 432bf215546Sopenharmony_ci crocus_emit_end_of_pipe_sync(batch, 433bf215546Sopenharmony_ci "change STATE_BASE_ADDRESS (flushes)", 434bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH | 435bf215546Sopenharmony_ci dc_flush | 436bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH); 437bf215546Sopenharmony_ci#endif 438bf215546Sopenharmony_ci} 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_cistatic void 441bf215546Sopenharmony_ciflush_after_state_base_change(struct crocus_batch *batch) 442bf215546Sopenharmony_ci{ 443bf215546Sopenharmony_ci /* After re-setting the surface state base address, we have to do some 444bf215546Sopenharmony_ci * cache flusing so that the sampler engine will pick up the new 445bf215546Sopenharmony_ci * SURFACE_STATE objects and binding tables. From the Broadwell PRM, 446bf215546Sopenharmony_ci * Shared Function > 3D Sampler > State > State Caching (page 96): 447bf215546Sopenharmony_ci * 448bf215546Sopenharmony_ci * Coherency with system memory in the state cache, like the texture 449bf215546Sopenharmony_ci * cache is handled partially by software. It is expected that the 450bf215546Sopenharmony_ci * command stream or shader will issue Cache Flush operation or 451bf215546Sopenharmony_ci * Cache_Flush sampler message to ensure that the L1 cache remains 452bf215546Sopenharmony_ci * coherent with system memory. 453bf215546Sopenharmony_ci * 454bf215546Sopenharmony_ci * [...] 455bf215546Sopenharmony_ci * 456bf215546Sopenharmony_ci * Whenever the value of the Dynamic_State_Base_Addr, 457bf215546Sopenharmony_ci * Surface_State_Base_Addr are altered, the L1 state cache must be 458bf215546Sopenharmony_ci * invalidated to ensure the new surface or sampler state is fetched 459bf215546Sopenharmony_ci * from system memory. 460bf215546Sopenharmony_ci * 461bf215546Sopenharmony_ci * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit 462bf215546Sopenharmony_ci * which, according the PIPE_CONTROL instruction documentation in the 463bf215546Sopenharmony_ci * Broadwell PRM: 464bf215546Sopenharmony_ci * 465bf215546Sopenharmony_ci * Setting this bit is independent of any other bit in this packet. 466bf215546Sopenharmony_ci * This bit controls the invalidation of the L1 and L2 state caches 467bf215546Sopenharmony_ci * at the top of the pipe i.e. at the parsing time. 468bf215546Sopenharmony_ci * 469bf215546Sopenharmony_ci * Unfortunately, experimentation seems to indicate that state cache 470bf215546Sopenharmony_ci * invalidation through a PIPE_CONTROL does nothing whatsoever in 471bf215546Sopenharmony_ci * regards to surface state and binding tables. In stead, it seems that 472bf215546Sopenharmony_ci * invalidating the texture cache is what is actually needed. 473bf215546Sopenharmony_ci * 474bf215546Sopenharmony_ci * XXX: As far as we have been able to determine through 475bf215546Sopenharmony_ci * experimentation, shows that flush the texture cache appears to be 476bf215546Sopenharmony_ci * sufficient. The theory here is that all of the sampling/rendering 477bf215546Sopenharmony_ci * units cache the binding table in the texture cache. However, we have 478bf215546Sopenharmony_ci * yet to be able to actually confirm this. 479bf215546Sopenharmony_ci */ 480bf215546Sopenharmony_ci#if GFX_VER >= 6 481bf215546Sopenharmony_ci crocus_emit_end_of_pipe_sync(batch, 482bf215546Sopenharmony_ci "change STATE_BASE_ADDRESS (invalidates)", 483bf215546Sopenharmony_ci PIPE_CONTROL_INSTRUCTION_INVALIDATE | 484bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 485bf215546Sopenharmony_ci PIPE_CONTROL_CONST_CACHE_INVALIDATE | 486bf215546Sopenharmony_ci PIPE_CONTROL_STATE_CACHE_INVALIDATE); 487bf215546Sopenharmony_ci#endif 488bf215546Sopenharmony_ci} 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci#if GFX_VER >= 6 491bf215546Sopenharmony_cistatic void 492bf215546Sopenharmony_cicrocus_store_register_mem32(struct crocus_batch *batch, uint32_t reg, 493bf215546Sopenharmony_ci struct crocus_bo *bo, uint32_t offset, 494bf215546Sopenharmony_ci bool predicated) 495bf215546Sopenharmony_ci{ 496bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) { 497bf215546Sopenharmony_ci srm.RegisterAddress = reg; 498bf215546Sopenharmony_ci srm.MemoryAddress = ggtt_bo(bo, offset); 499bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 500bf215546Sopenharmony_ci srm.PredicateEnable = predicated; 501bf215546Sopenharmony_ci#else 502bf215546Sopenharmony_ci if (predicated) 503bf215546Sopenharmony_ci unreachable("unsupported predication"); 504bf215546Sopenharmony_ci#endif 505bf215546Sopenharmony_ci } 506bf215546Sopenharmony_ci} 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_cistatic void 509bf215546Sopenharmony_cicrocus_store_register_mem64(struct crocus_batch *batch, uint32_t reg, 510bf215546Sopenharmony_ci struct crocus_bo *bo, uint32_t offset, 511bf215546Sopenharmony_ci bool predicated) 512bf215546Sopenharmony_ci{ 513bf215546Sopenharmony_ci crocus_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated); 514bf215546Sopenharmony_ci crocus_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated); 515bf215546Sopenharmony_ci} 516bf215546Sopenharmony_ci#endif 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci#if GFX_VER >= 7 519bf215546Sopenharmony_cistatic void 520bf215546Sopenharmony_ci_crocus_emit_lri(struct crocus_batch *batch, uint32_t reg, uint32_t val) 521bf215546Sopenharmony_ci{ 522bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 523bf215546Sopenharmony_ci lri.RegisterOffset = reg; 524bf215546Sopenharmony_ci lri.DataDWord = val; 525bf215546Sopenharmony_ci } 526bf215546Sopenharmony_ci} 527bf215546Sopenharmony_ci#define crocus_emit_lri(b, r, v) _crocus_emit_lri(b, GENX(r##_num), v) 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 530bf215546Sopenharmony_cistatic void 531bf215546Sopenharmony_ci_crocus_emit_lrr(struct crocus_batch *batch, uint32_t dst, uint32_t src) 532bf215546Sopenharmony_ci{ 533bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) { 534bf215546Sopenharmony_ci lrr.SourceRegisterAddress = src; 535bf215546Sopenharmony_ci lrr.DestinationRegisterAddress = dst; 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci} 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_cistatic void 540bf215546Sopenharmony_cicrocus_load_register_reg32(struct crocus_batch *batch, uint32_t dst, 541bf215546Sopenharmony_ci uint32_t src) 542bf215546Sopenharmony_ci{ 543bf215546Sopenharmony_ci _crocus_emit_lrr(batch, dst, src); 544bf215546Sopenharmony_ci} 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_cistatic void 547bf215546Sopenharmony_cicrocus_load_register_reg64(struct crocus_batch *batch, uint32_t dst, 548bf215546Sopenharmony_ci uint32_t src) 549bf215546Sopenharmony_ci{ 550bf215546Sopenharmony_ci _crocus_emit_lrr(batch, dst, src); 551bf215546Sopenharmony_ci _crocus_emit_lrr(batch, dst + 4, src + 4); 552bf215546Sopenharmony_ci} 553bf215546Sopenharmony_ci#endif 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_cistatic void 556bf215546Sopenharmony_cicrocus_load_register_imm32(struct crocus_batch *batch, uint32_t reg, 557bf215546Sopenharmony_ci uint32_t val) 558bf215546Sopenharmony_ci{ 559bf215546Sopenharmony_ci _crocus_emit_lri(batch, reg, val); 560bf215546Sopenharmony_ci} 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_cistatic void 563bf215546Sopenharmony_cicrocus_load_register_imm64(struct crocus_batch *batch, uint32_t reg, 564bf215546Sopenharmony_ci uint64_t val) 565bf215546Sopenharmony_ci{ 566bf215546Sopenharmony_ci _crocus_emit_lri(batch, reg + 0, val & 0xffffffff); 567bf215546Sopenharmony_ci _crocus_emit_lri(batch, reg + 4, val >> 32); 568bf215546Sopenharmony_ci} 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci/** 571bf215546Sopenharmony_ci * Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer. 572bf215546Sopenharmony_ci */ 573bf215546Sopenharmony_cistatic void 574bf215546Sopenharmony_cicrocus_load_register_mem32(struct crocus_batch *batch, uint32_t reg, 575bf215546Sopenharmony_ci struct crocus_bo *bo, uint32_t offset) 576bf215546Sopenharmony_ci{ 577bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 578bf215546Sopenharmony_ci lrm.RegisterAddress = reg; 579bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, offset); 580bf215546Sopenharmony_ci } 581bf215546Sopenharmony_ci} 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci/** 584bf215546Sopenharmony_ci * Load a 64-bit value from a buffer into a MMIO register via 585bf215546Sopenharmony_ci * two MI_LOAD_REGISTER_MEM commands. 586bf215546Sopenharmony_ci */ 587bf215546Sopenharmony_cistatic void 588bf215546Sopenharmony_cicrocus_load_register_mem64(struct crocus_batch *batch, uint32_t reg, 589bf215546Sopenharmony_ci struct crocus_bo *bo, uint32_t offset) 590bf215546Sopenharmony_ci{ 591bf215546Sopenharmony_ci crocus_load_register_mem32(batch, reg + 0, bo, offset + 0); 592bf215546Sopenharmony_ci crocus_load_register_mem32(batch, reg + 4, bo, offset + 4); 593bf215546Sopenharmony_ci} 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 596bf215546Sopenharmony_cistatic void 597bf215546Sopenharmony_cicrocus_store_data_imm32(struct crocus_batch *batch, 598bf215546Sopenharmony_ci struct crocus_bo *bo, uint32_t offset, 599bf215546Sopenharmony_ci uint32_t imm) 600bf215546Sopenharmony_ci{ 601bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) { 602bf215546Sopenharmony_ci sdi.Address = rw_bo(bo, offset); 603bf215546Sopenharmony_ci#if GFX_VER >= 6 604bf215546Sopenharmony_ci sdi.ImmediateData = imm; 605bf215546Sopenharmony_ci#endif 606bf215546Sopenharmony_ci } 607bf215546Sopenharmony_ci} 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_cistatic void 610bf215546Sopenharmony_cicrocus_store_data_imm64(struct crocus_batch *batch, 611bf215546Sopenharmony_ci struct crocus_bo *bo, uint32_t offset, 612bf215546Sopenharmony_ci uint64_t imm) 613bf215546Sopenharmony_ci{ 614bf215546Sopenharmony_ci /* Can't use crocus_emit_cmd because MI_STORE_DATA_IMM has a length of 615bf215546Sopenharmony_ci * 2 in genxml but it's actually variable length and we need 5 DWords. 616bf215546Sopenharmony_ci */ 617bf215546Sopenharmony_ci void *map = crocus_get_command_space(batch, 4 * 5); 618bf215546Sopenharmony_ci _crocus_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) { 619bf215546Sopenharmony_ci sdi.DWordLength = 5 - 2; 620bf215546Sopenharmony_ci sdi.Address = rw_bo(bo, offset); 621bf215546Sopenharmony_ci#if GFX_VER >= 6 622bf215546Sopenharmony_ci sdi.ImmediateData = imm; 623bf215546Sopenharmony_ci#endif 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci} 626bf215546Sopenharmony_ci#endif 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_cistatic void 629bf215546Sopenharmony_cicrocus_copy_mem_mem(struct crocus_batch *batch, 630bf215546Sopenharmony_ci struct crocus_bo *dst_bo, uint32_t dst_offset, 631bf215546Sopenharmony_ci struct crocus_bo *src_bo, uint32_t src_offset, 632bf215546Sopenharmony_ci unsigned bytes) 633bf215546Sopenharmony_ci{ 634bf215546Sopenharmony_ci assert(bytes % 4 == 0); 635bf215546Sopenharmony_ci assert(dst_offset % 4 == 0); 636bf215546Sopenharmony_ci assert(src_offset % 4 == 0); 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci#define CROCUS_TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */ 639bf215546Sopenharmony_ci for (unsigned i = 0; i < bytes; i += 4) { 640bf215546Sopenharmony_ci crocus_load_register_mem32(batch, CROCUS_TEMP_REG, 641bf215546Sopenharmony_ci src_bo, src_offset + i); 642bf215546Sopenharmony_ci crocus_store_register_mem32(batch, CROCUS_TEMP_REG, 643bf215546Sopenharmony_ci dst_bo, dst_offset + i, false); 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci} 646bf215546Sopenharmony_ci#endif 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci/** 649bf215546Sopenharmony_ci * Gallium CSO for rasterizer state. 650bf215546Sopenharmony_ci */ 651bf215546Sopenharmony_cistruct crocus_rasterizer_state { 652bf215546Sopenharmony_ci struct pipe_rasterizer_state cso; 653bf215546Sopenharmony_ci#if GFX_VER >= 6 654bf215546Sopenharmony_ci uint32_t sf[GENX(3DSTATE_SF_length)]; 655bf215546Sopenharmony_ci uint32_t clip[GENX(3DSTATE_CLIP_length)]; 656bf215546Sopenharmony_ci#endif 657bf215546Sopenharmony_ci#if GFX_VER >= 8 658bf215546Sopenharmony_ci uint32_t raster[GENX(3DSTATE_RASTER_length)]; 659bf215546Sopenharmony_ci#endif 660bf215546Sopenharmony_ci uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)]; 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci uint8_t num_clip_plane_consts; 663bf215546Sopenharmony_ci bool fill_mode_point_or_line; 664bf215546Sopenharmony_ci}; 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci#if GFX_VER <= 5 667bf215546Sopenharmony_ci#define URB_VS 0 668bf215546Sopenharmony_ci#define URB_GS 1 669bf215546Sopenharmony_ci#define URB_CLP 2 670bf215546Sopenharmony_ci#define URB_SF 3 671bf215546Sopenharmony_ci#define URB_CS 4 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_cistatic const struct { 674bf215546Sopenharmony_ci uint32_t min_nr_entries; 675bf215546Sopenharmony_ci uint32_t preferred_nr_entries; 676bf215546Sopenharmony_ci uint32_t min_entry_size; 677bf215546Sopenharmony_ci uint32_t max_entry_size; 678bf215546Sopenharmony_ci} limits[URB_CS+1] = { 679bf215546Sopenharmony_ci { 16, 32, 1, 5 }, /* vs */ 680bf215546Sopenharmony_ci { 4, 8, 1, 5 }, /* gs */ 681bf215546Sopenharmony_ci { 5, 10, 1, 5 }, /* clp */ 682bf215546Sopenharmony_ci { 1, 8, 1, 12 }, /* sf */ 683bf215546Sopenharmony_ci { 1, 4, 1, 32 } /* cs */ 684bf215546Sopenharmony_ci}; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_cistatic bool check_urb_layout(struct crocus_context *ice) 687bf215546Sopenharmony_ci{ 688bf215546Sopenharmony_ci ice->urb.vs_start = 0; 689bf215546Sopenharmony_ci ice->urb.gs_start = ice->urb.nr_vs_entries * ice->urb.vsize; 690bf215546Sopenharmony_ci ice->urb.clip_start = ice->urb.gs_start + ice->urb.nr_gs_entries * ice->urb.vsize; 691bf215546Sopenharmony_ci ice->urb.sf_start = ice->urb.clip_start + ice->urb.nr_clip_entries * ice->urb.vsize; 692bf215546Sopenharmony_ci ice->urb.cs_start = ice->urb.sf_start + ice->urb.nr_sf_entries * ice->urb.sfsize; 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci return ice->urb.cs_start + ice->urb.nr_cs_entries * 695bf215546Sopenharmony_ci ice->urb.csize <= ice->urb.size; 696bf215546Sopenharmony_ci} 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_cistatic bool 700bf215546Sopenharmony_cicrocus_calculate_urb_fence(struct crocus_batch *batch, unsigned csize, 701bf215546Sopenharmony_ci unsigned vsize, unsigned sfsize) 702bf215546Sopenharmony_ci{ 703bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 704bf215546Sopenharmony_ci if (csize < limits[URB_CS].min_entry_size) 705bf215546Sopenharmony_ci csize = limits[URB_CS].min_entry_size; 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci if (vsize < limits[URB_VS].min_entry_size) 708bf215546Sopenharmony_ci vsize = limits[URB_VS].min_entry_size; 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci if (sfsize < limits[URB_SF].min_entry_size) 711bf215546Sopenharmony_ci sfsize = limits[URB_SF].min_entry_size; 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci if (ice->urb.vsize < vsize || 714bf215546Sopenharmony_ci ice->urb.sfsize < sfsize || 715bf215546Sopenharmony_ci ice->urb.csize < csize || 716bf215546Sopenharmony_ci (ice->urb.constrained && (ice->urb.vsize > vsize || 717bf215546Sopenharmony_ci ice->urb.sfsize > sfsize || 718bf215546Sopenharmony_ci ice->urb.csize > csize))) { 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci ice->urb.csize = csize; 722bf215546Sopenharmony_ci ice->urb.sfsize = sfsize; 723bf215546Sopenharmony_ci ice->urb.vsize = vsize; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries; 726bf215546Sopenharmony_ci ice->urb.nr_gs_entries = limits[URB_GS].preferred_nr_entries; 727bf215546Sopenharmony_ci ice->urb.nr_clip_entries = limits[URB_CLP].preferred_nr_entries; 728bf215546Sopenharmony_ci ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries; 729bf215546Sopenharmony_ci ice->urb.nr_cs_entries = limits[URB_CS].preferred_nr_entries; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci ice->urb.constrained = 0; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci if (GFX_VER == 5) { 734bf215546Sopenharmony_ci ice->urb.nr_vs_entries = 128; 735bf215546Sopenharmony_ci ice->urb.nr_sf_entries = 48; 736bf215546Sopenharmony_ci if (check_urb_layout(ice)) { 737bf215546Sopenharmony_ci goto done; 738bf215546Sopenharmony_ci } else { 739bf215546Sopenharmony_ci ice->urb.constrained = 1; 740bf215546Sopenharmony_ci ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries; 741bf215546Sopenharmony_ci ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries; 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci } else if (GFX_VERx10 == 45) { 744bf215546Sopenharmony_ci ice->urb.nr_vs_entries = 64; 745bf215546Sopenharmony_ci if (check_urb_layout(ice)) { 746bf215546Sopenharmony_ci goto done; 747bf215546Sopenharmony_ci } else { 748bf215546Sopenharmony_ci ice->urb.constrained = 1; 749bf215546Sopenharmony_ci ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries; 750bf215546Sopenharmony_ci } 751bf215546Sopenharmony_ci } 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci if (!check_urb_layout(ice)) { 754bf215546Sopenharmony_ci ice->urb.nr_vs_entries = limits[URB_VS].min_nr_entries; 755bf215546Sopenharmony_ci ice->urb.nr_gs_entries = limits[URB_GS].min_nr_entries; 756bf215546Sopenharmony_ci ice->urb.nr_clip_entries = limits[URB_CLP].min_nr_entries; 757bf215546Sopenharmony_ci ice->urb.nr_sf_entries = limits[URB_SF].min_nr_entries; 758bf215546Sopenharmony_ci ice->urb.nr_cs_entries = limits[URB_CS].min_nr_entries; 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci /* Mark us as operating with constrained nr_entries, so that next 761bf215546Sopenharmony_ci * time we recalculate we'll resize the fences in the hope of 762bf215546Sopenharmony_ci * escaping constrained mode and getting back to normal performance. 763bf215546Sopenharmony_ci */ 764bf215546Sopenharmony_ci ice->urb.constrained = 1; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci if (!check_urb_layout(ice)) { 767bf215546Sopenharmony_ci /* This is impossible, given the maximal sizes of urb 768bf215546Sopenharmony_ci * entries and the values for minimum nr of entries 769bf215546Sopenharmony_ci * provided above. 770bf215546Sopenharmony_ci */ 771bf215546Sopenharmony_ci fprintf(stderr, "couldn't calculate URB layout!\n"); 772bf215546Sopenharmony_ci exit(1); 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF)) 776bf215546Sopenharmony_ci fprintf(stderr, "URB CONSTRAINED\n"); 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_cidone: 780bf215546Sopenharmony_ci if (INTEL_DEBUG(DEBUG_URB)) 781bf215546Sopenharmony_ci fprintf(stderr, 782bf215546Sopenharmony_ci "URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", 783bf215546Sopenharmony_ci ice->urb.vs_start, 784bf215546Sopenharmony_ci ice->urb.gs_start, 785bf215546Sopenharmony_ci ice->urb.clip_start, 786bf215546Sopenharmony_ci ice->urb.sf_start, 787bf215546Sopenharmony_ci ice->urb.cs_start, 788bf215546Sopenharmony_ci ice->urb.size); 789bf215546Sopenharmony_ci return true; 790bf215546Sopenharmony_ci } 791bf215546Sopenharmony_ci return false; 792bf215546Sopenharmony_ci} 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_cistatic void 795bf215546Sopenharmony_cicrocus_upload_urb_fence(struct crocus_batch *batch) 796bf215546Sopenharmony_ci{ 797bf215546Sopenharmony_ci uint32_t urb_fence[3]; 798bf215546Sopenharmony_ci _crocus_pack_command(batch, GENX(URB_FENCE), urb_fence, urb) { 799bf215546Sopenharmony_ci urb.VSUnitURBReallocationRequest = 1; 800bf215546Sopenharmony_ci urb.GSUnitURBReallocationRequest = 1; 801bf215546Sopenharmony_ci urb.CLIPUnitURBReallocationRequest = 1; 802bf215546Sopenharmony_ci urb.SFUnitURBReallocationRequest = 1; 803bf215546Sopenharmony_ci urb.VFEUnitURBReallocationRequest = 1; 804bf215546Sopenharmony_ci urb.CSUnitURBReallocationRequest = 1; 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci urb.VSFence = batch->ice->urb.gs_start; 807bf215546Sopenharmony_ci urb.GSFence = batch->ice->urb.clip_start; 808bf215546Sopenharmony_ci urb.CLIPFence = batch->ice->urb.sf_start; 809bf215546Sopenharmony_ci urb.SFFence = batch->ice->urb.cs_start; 810bf215546Sopenharmony_ci urb.CSFence = batch->ice->urb.size; 811bf215546Sopenharmony_ci } 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci /* erratum: URB_FENCE must not cross a 64byte cacheline */ 814bf215546Sopenharmony_ci if ((crocus_batch_bytes_used(batch) & 15) > 12) { 815bf215546Sopenharmony_ci int pad = 16 - (crocus_batch_bytes_used(batch) & 15); 816bf215546Sopenharmony_ci do { 817bf215546Sopenharmony_ci *(uint32_t *)batch->command.map_next = 0; 818bf215546Sopenharmony_ci batch->command.map_next += sizeof(uint32_t); 819bf215546Sopenharmony_ci } while (--pad); 820bf215546Sopenharmony_ci } 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci crocus_batch_emit(batch, urb_fence, sizeof(uint32_t) * 3); 823bf215546Sopenharmony_ci} 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_cistatic bool 826bf215546Sopenharmony_cicalculate_curbe_offsets(struct crocus_batch *batch) 827bf215546Sopenharmony_ci{ 828bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci unsigned nr_fp_regs, nr_vp_regs, nr_clip_regs = 0; 831bf215546Sopenharmony_ci unsigned total_regs; 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci nr_fp_regs = 0; 834bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 835bf215546Sopenharmony_ci const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data->ubo_ranges[i]; 836bf215546Sopenharmony_ci if (range->length == 0) 837bf215546Sopenharmony_ci continue; 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ci /* ubo range tracks at 256-bit, we need 512-bit */ 840bf215546Sopenharmony_ci nr_fp_regs += (range->length + 1) / 2; 841bf215546Sopenharmony_ci } 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci if (ice->state.cso_rast->cso.clip_plane_enable) { 844bf215546Sopenharmony_ci unsigned nr_planes = 6 + util_bitcount(ice->state.cso_rast->cso.clip_plane_enable); 845bf215546Sopenharmony_ci nr_clip_regs = (nr_planes * 4 + 15) / 16; 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci nr_vp_regs = 0; 849bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 850bf215546Sopenharmony_ci const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data->ubo_ranges[i]; 851bf215546Sopenharmony_ci if (range->length == 0) 852bf215546Sopenharmony_ci continue; 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci /* ubo range tracks at 256-bit, we need 512-bit */ 855bf215546Sopenharmony_ci nr_vp_regs += (range->length + 1) / 2; 856bf215546Sopenharmony_ci } 857bf215546Sopenharmony_ci if (nr_vp_regs == 0) { 858bf215546Sopenharmony_ci /* The pre-gen6 VS requires that some push constants get loaded no 859bf215546Sopenharmony_ci * matter what, or the GPU would hang. 860bf215546Sopenharmony_ci */ 861bf215546Sopenharmony_ci nr_vp_regs = 1; 862bf215546Sopenharmony_ci } 863bf215546Sopenharmony_ci total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci /* The CURBE allocation size is limited to 32 512-bit units (128 EU 866bf215546Sopenharmony_ci * registers, or 1024 floats). See CS_URB_STATE in the gen4 or gen5 867bf215546Sopenharmony_ci * (volume 1, part 1) PRMs. 868bf215546Sopenharmony_ci * 869bf215546Sopenharmony_ci * Note that in brw_fs.cpp we're only loading up to 16 EU registers of 870bf215546Sopenharmony_ci * values as push constants before spilling to pull constants, and in 871bf215546Sopenharmony_ci * brw_vec4.cpp we're loading up to 32 registers of push constants. An EU 872bf215546Sopenharmony_ci * register is 1/2 of one of these URB entry units, so that leaves us 16 EU 873bf215546Sopenharmony_ci * regs for clip. 874bf215546Sopenharmony_ci */ 875bf215546Sopenharmony_ci assert(total_regs <= 32); 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci /* Lazy resize: 878bf215546Sopenharmony_ci */ 879bf215546Sopenharmony_ci if (nr_fp_regs > ice->curbe.wm_size || 880bf215546Sopenharmony_ci nr_vp_regs > ice->curbe.vs_size || 881bf215546Sopenharmony_ci nr_clip_regs != ice->curbe.clip_size || 882bf215546Sopenharmony_ci (total_regs < ice->curbe.total_size / 4 && 883bf215546Sopenharmony_ci ice->curbe.total_size > 16)) { 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci GLuint reg = 0; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci /* Calculate a new layout: 888bf215546Sopenharmony_ci */ 889bf215546Sopenharmony_ci reg = 0; 890bf215546Sopenharmony_ci ice->curbe.wm_start = reg; 891bf215546Sopenharmony_ci ice->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; 892bf215546Sopenharmony_ci ice->curbe.clip_start = reg; 893bf215546Sopenharmony_ci ice->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; 894bf215546Sopenharmony_ci ice->curbe.vs_start = reg; 895bf215546Sopenharmony_ci ice->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; 896bf215546Sopenharmony_ci ice->curbe.total_size = reg; 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci if (0) 899bf215546Sopenharmony_ci fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n", 900bf215546Sopenharmony_ci ice->curbe.wm_start, 901bf215546Sopenharmony_ci ice->curbe.wm_size, 902bf215546Sopenharmony_ci ice->curbe.clip_start, 903bf215546Sopenharmony_ci ice->curbe.clip_size, 904bf215546Sopenharmony_ci ice->curbe.vs_start, 905bf215546Sopenharmony_ci ice->curbe.vs_size ); 906bf215546Sopenharmony_ci return true; 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci return false; 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_cistatic void 912bf215546Sopenharmony_ciupload_shader_consts(struct crocus_context *ice, 913bf215546Sopenharmony_ci gl_shader_stage stage, 914bf215546Sopenharmony_ci uint32_t *map, 915bf215546Sopenharmony_ci unsigned start) 916bf215546Sopenharmony_ci{ 917bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[stage]; 918bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 919bf215546Sopenharmony_ci uint32_t *cmap; 920bf215546Sopenharmony_ci bool found = false; 921bf215546Sopenharmony_ci unsigned offset = start * 16; 922bf215546Sopenharmony_ci int total = 0; 923bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 924bf215546Sopenharmony_ci const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci if (range->length == 0) 927bf215546Sopenharmony_ci continue; 928bf215546Sopenharmony_ci 929bf215546Sopenharmony_ci unsigned block_index = crocus_bti_to_group_index( 930bf215546Sopenharmony_ci &shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block); 931bf215546Sopenharmony_ci unsigned len = range->length * 8 * sizeof(float); 932bf215546Sopenharmony_ci unsigned start = range->start * 8 * sizeof(float); 933bf215546Sopenharmony_ci struct pipe_transfer *transfer; 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci cmap = pipe_buffer_map_range(&ice->ctx, ice->state.shaders[stage].constbufs[block_index].buffer, 936bf215546Sopenharmony_ci ice->state.shaders[stage].constbufs[block_index].buffer_offset + start, len, 937bf215546Sopenharmony_ci PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED, &transfer); 938bf215546Sopenharmony_ci if (cmap) 939bf215546Sopenharmony_ci memcpy(&map[offset + (total * 8)], cmap, len); 940bf215546Sopenharmony_ci pipe_buffer_unmap(&ice->ctx, transfer); 941bf215546Sopenharmony_ci total += range->length; 942bf215546Sopenharmony_ci found = true; 943bf215546Sopenharmony_ci } 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci if (stage == MESA_SHADER_VERTEX && !found) { 946bf215546Sopenharmony_ci /* The pre-gen6 VS requires that some push constants get loaded no 947bf215546Sopenharmony_ci * matter what, or the GPU would hang. 948bf215546Sopenharmony_ci */ 949bf215546Sopenharmony_ci unsigned len = 16; 950bf215546Sopenharmony_ci memset(&map[offset], 0, len); 951bf215546Sopenharmony_ci } 952bf215546Sopenharmony_ci} 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_cistatic const float fixed_plane[6][4] = { 955bf215546Sopenharmony_ci { 0, 0, -1, 1 }, 956bf215546Sopenharmony_ci { 0, 0, 1, 1 }, 957bf215546Sopenharmony_ci { 0, -1, 0, 1 }, 958bf215546Sopenharmony_ci { 0, 1, 0, 1 }, 959bf215546Sopenharmony_ci {-1, 0, 0, 1 }, 960bf215546Sopenharmony_ci { 1, 0, 0, 1 } 961bf215546Sopenharmony_ci}; 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_cistatic void 964bf215546Sopenharmony_cigen4_upload_curbe(struct crocus_batch *batch) 965bf215546Sopenharmony_ci{ 966bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 967bf215546Sopenharmony_ci const unsigned sz = ice->curbe.total_size; 968bf215546Sopenharmony_ci const unsigned buf_sz = sz * 16 * sizeof(float); 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci if (sz == 0) 971bf215546Sopenharmony_ci goto emit; 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci uint32_t *map; 974bf215546Sopenharmony_ci u_upload_alloc(ice->ctx.const_uploader, 0, buf_sz, 64, 975bf215546Sopenharmony_ci &ice->curbe.curbe_offset, (struct pipe_resource **)&ice->curbe.curbe_res, (void **) &map); 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci /* fragment shader constants */ 978bf215546Sopenharmony_ci if (ice->curbe.wm_size) { 979bf215546Sopenharmony_ci upload_shader_consts(ice, MESA_SHADER_FRAGMENT, map, ice->curbe.wm_start); 980bf215546Sopenharmony_ci } 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci /* clipper constants */ 983bf215546Sopenharmony_ci if (ice->curbe.clip_size) { 984bf215546Sopenharmony_ci unsigned offset = ice->curbe.clip_start * 16; 985bf215546Sopenharmony_ci float *fmap = (float *)map; 986bf215546Sopenharmony_ci unsigned i; 987bf215546Sopenharmony_ci /* If any planes are going this way, send them all this way: 988bf215546Sopenharmony_ci */ 989bf215546Sopenharmony_ci for (i = 0; i < 6; i++) { 990bf215546Sopenharmony_ci fmap[offset + i * 4 + 0] = fixed_plane[i][0]; 991bf215546Sopenharmony_ci fmap[offset + i * 4 + 1] = fixed_plane[i][1]; 992bf215546Sopenharmony_ci fmap[offset + i * 4 + 2] = fixed_plane[i][2]; 993bf215546Sopenharmony_ci fmap[offset + i * 4 + 3] = fixed_plane[i][3]; 994bf215546Sopenharmony_ci } 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci unsigned mask = ice->state.cso_rast->cso.clip_plane_enable; 997bf215546Sopenharmony_ci struct pipe_clip_state *cp = &ice->state.clip_planes; 998bf215546Sopenharmony_ci while (mask) { 999bf215546Sopenharmony_ci const int j = u_bit_scan(&mask); 1000bf215546Sopenharmony_ci fmap[offset + i * 4 + 0] = cp->ucp[j][0]; 1001bf215546Sopenharmony_ci fmap[offset + i * 4 + 1] = cp->ucp[j][1]; 1002bf215546Sopenharmony_ci fmap[offset + i * 4 + 2] = cp->ucp[j][2]; 1003bf215546Sopenharmony_ci fmap[offset + i * 4 + 3] = cp->ucp[j][3]; 1004bf215546Sopenharmony_ci i++; 1005bf215546Sopenharmony_ci } 1006bf215546Sopenharmony_ci } 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci /* vertex shader constants */ 1009bf215546Sopenharmony_ci if (ice->curbe.vs_size) { 1010bf215546Sopenharmony_ci upload_shader_consts(ice, MESA_SHADER_VERTEX, map, ice->curbe.vs_start); 1011bf215546Sopenharmony_ci } 1012bf215546Sopenharmony_ci if (0) { 1013bf215546Sopenharmony_ci for (int i = 0; i < sz*16; i+=4) { 1014bf215546Sopenharmony_ci float *f = (float *)map; 1015bf215546Sopenharmony_ci fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4, 1016bf215546Sopenharmony_ci f[i+0], f[i+1], f[i+2], f[i+3]); 1017bf215546Sopenharmony_ci } 1018bf215546Sopenharmony_ci } 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ciemit: 1021bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(CONSTANT_BUFFER), cb) { 1022bf215546Sopenharmony_ci if (ice->curbe.curbe_res) { 1023bf215546Sopenharmony_ci cb.BufferLength = ice->curbe.total_size - 1; 1024bf215546Sopenharmony_ci cb.Valid = 1; 1025bf215546Sopenharmony_ci cb.BufferStartingAddress = ro_bo(ice->curbe.curbe_res->bo, ice->curbe.curbe_offset); 1026bf215546Sopenharmony_ci } 1027bf215546Sopenharmony_ci } 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci#if GFX_VER == 4 && GFX_VERx10 != 45 1030bf215546Sopenharmony_ci /* Work around a Broadwater/Crestline depth interpolator bug. The 1031bf215546Sopenharmony_ci * following sequence will cause GPU hangs: 1032bf215546Sopenharmony_ci * 1033bf215546Sopenharmony_ci * 1. Change state so that all depth related fields in CC_STATE are 1034bf215546Sopenharmony_ci * disabled, and in WM_STATE, only "PS Use Source Depth" is enabled. 1035bf215546Sopenharmony_ci * 2. Emit a CONSTANT_BUFFER packet. 1036bf215546Sopenharmony_ci * 3. Draw via 3DPRIMITIVE. 1037bf215546Sopenharmony_ci * 1038bf215546Sopenharmony_ci * The recommended workaround is to emit a non-pipelined state change after 1039bf215546Sopenharmony_ci * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline. 1040bf215546Sopenharmony_ci * 1041bf215546Sopenharmony_ci * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small), 1042bf215546Sopenharmony_ci * and always emit it when "PS Use Source Depth" is set. We could be more 1043bf215546Sopenharmony_ci * precise, but the additional complexity is probably not worth it. 1044bf215546Sopenharmony_ci * 1045bf215546Sopenharmony_ci */ 1046bf215546Sopenharmony_ci const struct shader_info *fs_info = 1047bf215546Sopenharmony_ci crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT); 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) { 1050bf215546Sopenharmony_ci ice->state.global_depth_offset_clamp = 0; 1051bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp); 1052bf215546Sopenharmony_ci } 1053bf215546Sopenharmony_ci#endif 1054bf215546Sopenharmony_ci} 1055bf215546Sopenharmony_ci#endif 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci#if GFX_VER >= 7 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000 1060bf215546Sopenharmony_ci#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000 1061bf215546Sopenharmony_ci#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_cistatic void 1064bf215546Sopenharmony_cisetup_l3_config(struct crocus_batch *batch, const struct intel_l3_config *cfg) 1065bf215546Sopenharmony_ci{ 1066bf215546Sopenharmony_ci#if GFX_VER == 7 1067bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &batch->screen->devinfo; 1068bf215546Sopenharmony_ci const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL]; 1069bf215546Sopenharmony_ci const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] || 1070bf215546Sopenharmony_ci cfg->n[INTEL_L3P_ALL]; 1071bf215546Sopenharmony_ci const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] || 1072bf215546Sopenharmony_ci cfg->n[INTEL_L3P_ALL]; 1073bf215546Sopenharmony_ci const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] || 1074bf215546Sopenharmony_ci cfg->n[INTEL_L3P_ALL]; 1075bf215546Sopenharmony_ci const bool has_slm = cfg->n[INTEL_L3P_SLM]; 1076bf215546Sopenharmony_ci#endif 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci /* According to the hardware docs, the L3 partitioning can only be changed 1079bf215546Sopenharmony_ci * while the pipeline is completely drained and the caches are flushed, 1080bf215546Sopenharmony_ci * which involves a first PIPE_CONTROL flush which stalls the pipeline... 1081bf215546Sopenharmony_ci */ 1082bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, "l3_config", 1083bf215546Sopenharmony_ci PIPE_CONTROL_DATA_CACHE_FLUSH | 1084bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci /* ...followed by a second pipelined PIPE_CONTROL that initiates 1087bf215546Sopenharmony_ci * invalidation of the relevant caches. Note that because RO invalidation 1088bf215546Sopenharmony_ci * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL 1089bf215546Sopenharmony_ci * command is processed by the CS) we cannot combine it with the previous 1090bf215546Sopenharmony_ci * stalling flush as the hardware documentation suggests, because that 1091bf215546Sopenharmony_ci * would cause the CS to stall on previous rendering *after* RO 1092bf215546Sopenharmony_ci * invalidation and wouldn't prevent the RO caches from being polluted by 1093bf215546Sopenharmony_ci * concurrent rendering before the stall completes. This intentionally 1094bf215546Sopenharmony_ci * doesn't implement the SKL+ hardware workaround suggesting to enable CS 1095bf215546Sopenharmony_ci * stall on PIPE_CONTROLs with the texture cache invalidation bit set for 1096bf215546Sopenharmony_ci * GPGPU workloads because the previous and subsequent PIPE_CONTROLs 1097bf215546Sopenharmony_ci * already guarantee that there is no concurrent GPGPU kernel execution 1098bf215546Sopenharmony_ci * (see SKL HSD 2132585). 1099bf215546Sopenharmony_ci */ 1100bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, "l3 config", 1101bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 1102bf215546Sopenharmony_ci PIPE_CONTROL_CONST_CACHE_INVALIDATE | 1103bf215546Sopenharmony_ci PIPE_CONTROL_INSTRUCTION_INVALIDATE | 1104bf215546Sopenharmony_ci PIPE_CONTROL_STATE_CACHE_INVALIDATE); 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci /* Now send a third stalling flush to make sure that invalidation is 1107bf215546Sopenharmony_ci * complete when the L3 configuration registers are modified. 1108bf215546Sopenharmony_ci */ 1109bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, "l3 config", 1110bf215546Sopenharmony_ci PIPE_CONTROL_DATA_CACHE_FLUSH | 1111bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_ci#if GFX_VER == 8 1114bf215546Sopenharmony_ci assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]); 1115bf215546Sopenharmony_ci crocus_emit_reg(batch, GENX(L3CNTLREG), reg) { 1116bf215546Sopenharmony_ci reg.SLMEnable = cfg->n[INTEL_L3P_SLM] > 0; 1117bf215546Sopenharmony_ci reg.URBAllocation = cfg->n[INTEL_L3P_URB]; 1118bf215546Sopenharmony_ci reg.ROAllocation = cfg->n[INTEL_L3P_RO]; 1119bf215546Sopenharmony_ci reg.DCAllocation = cfg->n[INTEL_L3P_DC]; 1120bf215546Sopenharmony_ci reg.AllAllocation = cfg->n[INTEL_L3P_ALL]; 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci#else 1123bf215546Sopenharmony_ci assert(!cfg->n[INTEL_L3P_ALL]); 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_ci /* When enabled SLM only uses a portion of the L3 on half of the banks, 1126bf215546Sopenharmony_ci * the matching space on the remaining banks has to be allocated to a 1127bf215546Sopenharmony_ci * client (URB for all validated configurations) set to the 1128bf215546Sopenharmony_ci * lower-bandwidth 2-bank address hashing mode. 1129bf215546Sopenharmony_ci */ 1130bf215546Sopenharmony_ci const bool urb_low_bw = has_slm && devinfo->platform != INTEL_PLATFORM_BYT; 1131bf215546Sopenharmony_ci assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]); 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci /* Minimum number of ways that can be allocated to the URB. */ 1134bf215546Sopenharmony_ci const unsigned n0_urb = (devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0); 1135bf215546Sopenharmony_ci assert(cfg->n[INTEL_L3P_URB] >= n0_urb); 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci uint32_t l3sqcr1, l3cr2, l3cr3; 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci crocus_pack_state(GENX(L3SQCREG1), &l3sqcr1, reg) { 1140bf215546Sopenharmony_ci reg.ConvertDC_UC = !has_dc; 1141bf215546Sopenharmony_ci reg.ConvertIS_UC = !has_is; 1142bf215546Sopenharmony_ci reg.ConvertC_UC = !has_c; 1143bf215546Sopenharmony_ci reg.ConvertT_UC = !has_t; 1144bf215546Sopenharmony_ci#if GFX_VERx10 == 75 1145bf215546Sopenharmony_ci reg.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT; 1146bf215546Sopenharmony_ci#else 1147bf215546Sopenharmony_ci reg.L3SQGeneralPriorityCreditInitialization = 1148bf215546Sopenharmony_ci devinfo->platform == INTEL_PLATFORM_BYT ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT; 1149bf215546Sopenharmony_ci#endif 1150bf215546Sopenharmony_ci reg.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT; 1151bf215546Sopenharmony_ci }; 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci crocus_pack_state(GENX(L3CNTLREG2), &l3cr2, reg) { 1154bf215546Sopenharmony_ci reg.SLMEnable = has_slm; 1155bf215546Sopenharmony_ci reg.URBLowBandwidth = urb_low_bw; 1156bf215546Sopenharmony_ci reg.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb; 1157bf215546Sopenharmony_ci#if !(GFX_VERx10 == 75) 1158bf215546Sopenharmony_ci reg.ALLAllocation = cfg->n[INTEL_L3P_ALL]; 1159bf215546Sopenharmony_ci#endif 1160bf215546Sopenharmony_ci reg.ROAllocation = cfg->n[INTEL_L3P_RO]; 1161bf215546Sopenharmony_ci reg.DCAllocation = cfg->n[INTEL_L3P_DC]; 1162bf215546Sopenharmony_ci }; 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci crocus_pack_state(GENX(L3CNTLREG3), &l3cr3, reg) { 1165bf215546Sopenharmony_ci reg.ISAllocation = cfg->n[INTEL_L3P_IS]; 1166bf215546Sopenharmony_ci reg.ISLowBandwidth = 0; 1167bf215546Sopenharmony_ci reg.CAllocation = cfg->n[INTEL_L3P_C]; 1168bf215546Sopenharmony_ci reg.CLowBandwidth = 0; 1169bf215546Sopenharmony_ci reg.TAllocation = cfg->n[INTEL_L3P_T]; 1170bf215546Sopenharmony_ci reg.TLowBandwidth = 0; 1171bf215546Sopenharmony_ci }; 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci /* Set up the L3 partitioning. */ 1174bf215546Sopenharmony_ci crocus_emit_lri(batch, L3SQCREG1, l3sqcr1); 1175bf215546Sopenharmony_ci crocus_emit_lri(batch, L3CNTLREG2, l3cr2); 1176bf215546Sopenharmony_ci crocus_emit_lri(batch, L3CNTLREG3, l3cr3); 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci#if GFX_VERSIONx10 == 75 1179bf215546Sopenharmony_ci /* TODO: Fail screen creation if command parser version < 4 */ 1180bf215546Sopenharmony_ci uint32_t scratch1, chicken3; 1181bf215546Sopenharmony_ci crocus_pack_state(GENX(SCRATCH1), &scratch1, reg) { 1182bf215546Sopenharmony_ci reg.L3AtomicDisable = !has_dc; 1183bf215546Sopenharmony_ci } 1184bf215546Sopenharmony_ci crocus_pack_state(GENX(CHICKEN3), &chicken3, reg) { 1185bf215546Sopenharmony_ci reg.L3AtomicDisableMask = true; 1186bf215546Sopenharmony_ci reg.L3AtomicDisable = !has_dc; 1187bf215546Sopenharmony_ci } 1188bf215546Sopenharmony_ci crocus_emit_lri(batch, SCRATCH1, scratch1); 1189bf215546Sopenharmony_ci crocus_emit_lri(batch, CHICKEN3, chicken3); 1190bf215546Sopenharmony_ci#endif 1191bf215546Sopenharmony_ci#endif 1192bf215546Sopenharmony_ci} 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_cistatic void 1195bf215546Sopenharmony_ciemit_l3_state(struct crocus_batch *batch, bool compute) 1196bf215546Sopenharmony_ci{ 1197bf215546Sopenharmony_ci const struct intel_l3_config *const cfg = 1198bf215546Sopenharmony_ci compute ? batch->screen->l3_config_cs : batch->screen->l3_config_3d; 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_ci setup_l3_config(batch, cfg); 1201bf215546Sopenharmony_ci if (INTEL_DEBUG(DEBUG_L3)) { 1202bf215546Sopenharmony_ci intel_dump_l3_config(cfg, stderr); 1203bf215546Sopenharmony_ci } 1204bf215546Sopenharmony_ci} 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci/** 1207bf215546Sopenharmony_ci * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set. 1208bf215546Sopenharmony_ci */ 1209bf215546Sopenharmony_cistatic void 1210bf215546Sopenharmony_cigen7_emit_cs_stall_flush(struct crocus_batch *batch) 1211bf215546Sopenharmony_ci{ 1212bf215546Sopenharmony_ci crocus_emit_pipe_control_write(batch, 1213bf215546Sopenharmony_ci "workaround", 1214bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL 1215bf215546Sopenharmony_ci | PIPE_CONTROL_WRITE_IMMEDIATE, 1216bf215546Sopenharmony_ci batch->ice->workaround_bo, 1217bf215546Sopenharmony_ci batch->ice->workaround_offset, 0); 1218bf215546Sopenharmony_ci} 1219bf215546Sopenharmony_ci#endif 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_cistatic void 1222bf215546Sopenharmony_ciemit_pipeline_select(struct crocus_batch *batch, uint32_t pipeline) 1223bf215546Sopenharmony_ci{ 1224bf215546Sopenharmony_ci#if GFX_VER == 8 1225bf215546Sopenharmony_ci /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: 1226bf215546Sopenharmony_ci * 1227bf215546Sopenharmony_ci * Software must clear the COLOR_CALC_STATE Valid field in 1228bf215546Sopenharmony_ci * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT 1229bf215546Sopenharmony_ci * with Pipeline Select set to GPGPU. 1230bf215546Sopenharmony_ci * 1231bf215546Sopenharmony_ci * The internal hardware docs recommend the same workaround for Gfx9 1232bf215546Sopenharmony_ci * hardware too. 1233bf215546Sopenharmony_ci */ 1234bf215546Sopenharmony_ci if (pipeline == GPGPU) 1235bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t); 1236bf215546Sopenharmony_ci#endif 1237bf215546Sopenharmony_ci 1238bf215546Sopenharmony_ci#if GFX_VER >= 6 1239bf215546Sopenharmony_ci /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] 1240bf215546Sopenharmony_ci * PIPELINE_SELECT [DevBWR+]": 1241bf215546Sopenharmony_ci * 1242bf215546Sopenharmony_ci * "Project: DEVSNB+ 1243bf215546Sopenharmony_ci * 1244bf215546Sopenharmony_ci * Software must ensure all the write caches are flushed through a 1245bf215546Sopenharmony_ci * stalling PIPE_CONTROL command followed by another PIPE_CONTROL 1246bf215546Sopenharmony_ci * command to invalidate read only caches prior to programming 1247bf215546Sopenharmony_ci * MI_PIPELINE_SELECT command to change the Pipeline Select Mode." 1248bf215546Sopenharmony_ci */ 1249bf215546Sopenharmony_ci const unsigned dc_flush = 1250bf215546Sopenharmony_ci GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; 1251bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, 1252bf215546Sopenharmony_ci "workaround: PIPELINE_SELECT flushes (1/2)", 1253bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH | 1254bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1255bf215546Sopenharmony_ci dc_flush | 1256bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, 1259bf215546Sopenharmony_ci "workaround: PIPELINE_SELECT flushes (2/2)", 1260bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 1261bf215546Sopenharmony_ci PIPE_CONTROL_CONST_CACHE_INVALIDATE | 1262bf215546Sopenharmony_ci PIPE_CONTROL_STATE_CACHE_INVALIDATE | 1263bf215546Sopenharmony_ci PIPE_CONTROL_INSTRUCTION_INVALIDATE); 1264bf215546Sopenharmony_ci#else 1265bf215546Sopenharmony_ci /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] 1266bf215546Sopenharmony_ci * PIPELINE_SELECT [DevBWR+]": 1267bf215546Sopenharmony_ci * 1268bf215546Sopenharmony_ci * Project: PRE-DEVSNB 1269bf215546Sopenharmony_ci * 1270bf215546Sopenharmony_ci * Software must ensure the current pipeline is flushed via an 1271bf215546Sopenharmony_ci * MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT. 1272bf215546Sopenharmony_ci */ 1273bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_FLUSH), foo); 1274bf215546Sopenharmony_ci#endif 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) { 1277bf215546Sopenharmony_ci sel.PipelineSelection = pipeline; 1278bf215546Sopenharmony_ci } 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci#if GFX_VER == 7 && !(GFX_VERx10 == 75) 1281bf215546Sopenharmony_ci if (pipeline == _3D) { 1282bf215546Sopenharmony_ci gen7_emit_cs_stall_flush(batch); 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { 1285bf215546Sopenharmony_ci prim.PrimitiveTopologyType = _3DPRIM_POINTLIST; 1286bf215546Sopenharmony_ci }; 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ci#endif 1289bf215546Sopenharmony_ci} 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci/** 1292bf215546Sopenharmony_ci * The following diagram shows how we partition the URB: 1293bf215546Sopenharmony_ci * 1294bf215546Sopenharmony_ci * 16kB or 32kB Rest of the URB space 1295bf215546Sopenharmony_ci * __________-__________ _________________-_________________ 1296bf215546Sopenharmony_ci * / \ / \ 1297bf215546Sopenharmony_ci * +-------------------------------------------------------------+ 1298bf215546Sopenharmony_ci * | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB | 1299bf215546Sopenharmony_ci * | Constants | Entries | 1300bf215546Sopenharmony_ci * +-------------------------------------------------------------+ 1301bf215546Sopenharmony_ci * 1302bf215546Sopenharmony_ci * Notably, push constants must be stored at the beginning of the URB 1303bf215546Sopenharmony_ci * space, while entries can be stored anywhere. Ivybridge and Haswell 1304bf215546Sopenharmony_ci * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3 1305bf215546Sopenharmony_ci * doubles this (32kB). 1306bf215546Sopenharmony_ci * 1307bf215546Sopenharmony_ci * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and 1308bf215546Sopenharmony_ci * sized) in increments of 1kB. Haswell GT3 requires them to be located and 1309bf215546Sopenharmony_ci * sized in increments of 2kB. 1310bf215546Sopenharmony_ci * 1311bf215546Sopenharmony_ci * Currently we split the constant buffer space evenly among whatever stages 1312bf215546Sopenharmony_ci * are active. This is probably not ideal, but simple. 1313bf215546Sopenharmony_ci * 1314bf215546Sopenharmony_ci * Ivybridge GT1 and Haswell GT1 have 128kB of URB space. 1315bf215546Sopenharmony_ci * Ivybridge GT2 and Haswell GT2 have 256kB of URB space. 1316bf215546Sopenharmony_ci * Haswell GT3 has 512kB of URB space. 1317bf215546Sopenharmony_ci * 1318bf215546Sopenharmony_ci * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations", 1319bf215546Sopenharmony_ci * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS. 1320bf215546Sopenharmony_ci */ 1321bf215546Sopenharmony_ci#if GFX_VER >= 7 1322bf215546Sopenharmony_cistatic void 1323bf215546Sopenharmony_cicrocus_alloc_push_constants(struct crocus_batch *batch) 1324bf215546Sopenharmony_ci{ 1325bf215546Sopenharmony_ci const unsigned push_constant_kb = 1326bf215546Sopenharmony_ci batch->screen->devinfo.max_constant_urb_size_kb; 1327bf215546Sopenharmony_ci unsigned size_per_stage = push_constant_kb / 5; 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci /* For now, we set a static partitioning of the push constant area, 1330bf215546Sopenharmony_ci * assuming that all stages could be in use. 1331bf215546Sopenharmony_ci * 1332bf215546Sopenharmony_ci * TODO: Try lazily allocating the HS/DS/GS sections as needed, and 1333bf215546Sopenharmony_ci * see if that improves performance by offering more space to 1334bf215546Sopenharmony_ci * the VS/FS when those aren't in use. Also, try dynamically 1335bf215546Sopenharmony_ci * enabling/disabling it like i965 does. This would be more 1336bf215546Sopenharmony_ci * stalls and may not actually help; we don't know yet. 1337bf215546Sopenharmony_ci */ 1338bf215546Sopenharmony_ci for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) { 1339bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) { 1340bf215546Sopenharmony_ci alloc._3DCommandSubOpcode = 18 + i; 1341bf215546Sopenharmony_ci alloc.ConstantBufferOffset = size_per_stage * i; 1342bf215546Sopenharmony_ci alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? (push_constant_kb - 4 * size_per_stage) : size_per_stage; 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci } 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS): 1347bf215546Sopenharmony_ci * 1348bf215546Sopenharmony_ci * A PIPE_CONTROL command with the CS Stall bit set must be programmed 1349bf215546Sopenharmony_ci * in the ring after this instruction. 1350bf215546Sopenharmony_ci * 1351bf215546Sopenharmony_ci * No such restriction exists for Haswell or Baytrail. 1352bf215546Sopenharmony_ci */ 1353bf215546Sopenharmony_ci if (batch->screen->devinfo.platform == INTEL_PLATFORM_IVB) 1354bf215546Sopenharmony_ci gen7_emit_cs_stall_flush(batch); 1355bf215546Sopenharmony_ci} 1356bf215546Sopenharmony_ci#endif 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_ci/** 1359bf215546Sopenharmony_ci * Upload the initial GPU state for a render context. 1360bf215546Sopenharmony_ci * 1361bf215546Sopenharmony_ci * This sets some invariant state that needs to be programmed a particular 1362bf215546Sopenharmony_ci * way, but we never actually change. 1363bf215546Sopenharmony_ci */ 1364bf215546Sopenharmony_cistatic void 1365bf215546Sopenharmony_cicrocus_init_render_context(struct crocus_batch *batch) 1366bf215546Sopenharmony_ci{ 1367bf215546Sopenharmony_ci UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo; 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci emit_pipeline_select(batch, _3D); 1370bf215546Sopenharmony_ci 1371bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(STATE_SIP), foo); 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci#if GFX_VER >= 7 1374bf215546Sopenharmony_ci emit_l3_state(batch, false); 1375bf215546Sopenharmony_ci#endif 1376bf215546Sopenharmony_ci#if (GFX_VERx10 == 70 || GFX_VERx10 == 80) 1377bf215546Sopenharmony_ci crocus_emit_reg(batch, GENX(INSTPM), reg) { 1378bf215546Sopenharmony_ci reg.CONSTANT_BUFFERAddressOffsetDisable = true; 1379bf215546Sopenharmony_ci reg.CONSTANT_BUFFERAddressOffsetDisableMask = true; 1380bf215546Sopenharmony_ci } 1381bf215546Sopenharmony_ci#endif 1382bf215546Sopenharmony_ci#if GFX_VER >= 5 || GFX_VERx10 == 45 1383bf215546Sopenharmony_ci /* Use the legacy AA line coverage computation. */ 1384bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo); 1385bf215546Sopenharmony_ci#endif 1386bf215546Sopenharmony_ci 1387bf215546Sopenharmony_ci /* No polygon stippling offsets are necessary. */ 1388bf215546Sopenharmony_ci /* TODO: may need to set an offset for origin-UL framebuffers */ 1389bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo); 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci#if GFX_VER >= 7 1392bf215546Sopenharmony_ci crocus_alloc_push_constants(batch); 1393bf215546Sopenharmony_ci#endif 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci#if GFX_VER == 8 1396bf215546Sopenharmony_ci /* Set the initial MSAA sample positions. */ 1397bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) { 1398bf215546Sopenharmony_ci INTEL_SAMPLE_POS_1X(pat._1xSample); 1399bf215546Sopenharmony_ci INTEL_SAMPLE_POS_2X(pat._2xSample); 1400bf215546Sopenharmony_ci INTEL_SAMPLE_POS_4X(pat._4xSample); 1401bf215546Sopenharmony_ci INTEL_SAMPLE_POS_8X(pat._8xSample); 1402bf215546Sopenharmony_ci } 1403bf215546Sopenharmony_ci 1404bf215546Sopenharmony_ci /* Disable chromakeying (it's for media) */ 1405bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo); 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci /* We want regular rendering, not special HiZ operations. */ 1408bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo); 1409bf215546Sopenharmony_ci#endif 1410bf215546Sopenharmony_ci} 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci#if GFX_VER >= 7 1413bf215546Sopenharmony_cistatic void 1414bf215546Sopenharmony_cicrocus_init_compute_context(struct crocus_batch *batch) 1415bf215546Sopenharmony_ci{ 1416bf215546Sopenharmony_ci UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo; 1417bf215546Sopenharmony_ci 1418bf215546Sopenharmony_ci emit_pipeline_select(batch, GPGPU); 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci#if GFX_VER >= 7 1421bf215546Sopenharmony_ci emit_l3_state(batch, true); 1422bf215546Sopenharmony_ci#endif 1423bf215546Sopenharmony_ci} 1424bf215546Sopenharmony_ci#endif 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_ci/** 1427bf215546Sopenharmony_ci * Generation-specific context state (ice->state.genx->...). 1428bf215546Sopenharmony_ci * 1429bf215546Sopenharmony_ci * Most state can go in crocus_context directly, but these encode hardware 1430bf215546Sopenharmony_ci * packets which vary by generation. 1431bf215546Sopenharmony_ci */ 1432bf215546Sopenharmony_cistruct crocus_genx_state { 1433bf215546Sopenharmony_ci struct { 1434bf215546Sopenharmony_ci#if GFX_VER >= 7 1435bf215546Sopenharmony_ci struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES]; 1436bf215546Sopenharmony_ci#endif 1437bf215546Sopenharmony_ci } shaders[MESA_SHADER_STAGES]; 1438bf215546Sopenharmony_ci 1439bf215546Sopenharmony_ci#if GFX_VER == 8 1440bf215546Sopenharmony_ci bool pma_fix_enabled; 1441bf215546Sopenharmony_ci#endif 1442bf215546Sopenharmony_ci}; 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci/** 1445bf215546Sopenharmony_ci * The pipe->set_blend_color() driver hook. 1446bf215546Sopenharmony_ci * 1447bf215546Sopenharmony_ci * This corresponds to our COLOR_CALC_STATE. 1448bf215546Sopenharmony_ci */ 1449bf215546Sopenharmony_cistatic void 1450bf215546Sopenharmony_cicrocus_set_blend_color(struct pipe_context *ctx, 1451bf215546Sopenharmony_ci const struct pipe_blend_color *state) 1452bf215546Sopenharmony_ci{ 1453bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 1454bf215546Sopenharmony_ci 1455bf215546Sopenharmony_ci /* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */ 1456bf215546Sopenharmony_ci memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color)); 1457bf215546Sopenharmony_ci#if GFX_VER <= 5 1458bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN4_CONSTANT_COLOR; 1459bf215546Sopenharmony_ci#else 1460bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE; 1461bf215546Sopenharmony_ci#endif 1462bf215546Sopenharmony_ci} 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci/** 1465bf215546Sopenharmony_ci * Gallium CSO for blend state (see pipe_blend_state). 1466bf215546Sopenharmony_ci */ 1467bf215546Sopenharmony_cistruct crocus_blend_state { 1468bf215546Sopenharmony_ci#if GFX_VER == 8 1469bf215546Sopenharmony_ci /** Partial 3DSTATE_PS_BLEND */ 1470bf215546Sopenharmony_ci uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)]; 1471bf215546Sopenharmony_ci#endif 1472bf215546Sopenharmony_ci 1473bf215546Sopenharmony_ci /** copy of BLEND_STATE */ 1474bf215546Sopenharmony_ci struct pipe_blend_state cso; 1475bf215546Sopenharmony_ci 1476bf215546Sopenharmony_ci /** Bitfield of whether blending is enabled for RT[i] - for aux resolves */ 1477bf215546Sopenharmony_ci uint8_t blend_enables; 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_ci /** Bitfield of whether color writes are enabled for RT[i] */ 1480bf215546Sopenharmony_ci uint8_t color_write_enables; 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci /** Does RT[0] use dual color blending? */ 1483bf215546Sopenharmony_ci bool dual_color_blending; 1484bf215546Sopenharmony_ci}; 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_cistatic enum pipe_blendfactor 1487bf215546Sopenharmony_cifix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one) 1488bf215546Sopenharmony_ci{ 1489bf215546Sopenharmony_ci if (alpha_to_one) { 1490bf215546Sopenharmony_ci if (f == PIPE_BLENDFACTOR_SRC1_ALPHA) 1491bf215546Sopenharmony_ci return PIPE_BLENDFACTOR_ONE; 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_ci if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA) 1494bf215546Sopenharmony_ci return PIPE_BLENDFACTOR_ZERO; 1495bf215546Sopenharmony_ci } 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci return f; 1498bf215546Sopenharmony_ci} 1499bf215546Sopenharmony_ci 1500bf215546Sopenharmony_ci#if GFX_VER >= 6 1501bf215546Sopenharmony_citypedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML; 1502bf215546Sopenharmony_ci#else 1503bf215546Sopenharmony_citypedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML; 1504bf215546Sopenharmony_ci#endif 1505bf215546Sopenharmony_ci 1506bf215546Sopenharmony_cistatic bool 1507bf215546Sopenharmony_cican_emit_logic_op(struct crocus_context *ice) 1508bf215546Sopenharmony_ci{ 1509bf215546Sopenharmony_ci /* all pre gen8 have logicop restricted to unorm */ 1510bf215546Sopenharmony_ci enum pipe_format pformat = PIPE_FORMAT_NONE; 1511bf215546Sopenharmony_ci for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) { 1512bf215546Sopenharmony_ci if (ice->state.framebuffer.cbufs[i]) { 1513bf215546Sopenharmony_ci pformat = ice->state.framebuffer.cbufs[i]->format; 1514bf215546Sopenharmony_ci break; 1515bf215546Sopenharmony_ci } 1516bf215546Sopenharmony_ci } 1517bf215546Sopenharmony_ci return (pformat == PIPE_FORMAT_NONE || util_format_is_unorm(pformat)); 1518bf215546Sopenharmony_ci} 1519bf215546Sopenharmony_ci 1520bf215546Sopenharmony_cistatic bool 1521bf215546Sopenharmony_ciset_blend_entry_bits(struct crocus_batch *batch, BLEND_ENTRY_GENXML *entry, 1522bf215546Sopenharmony_ci struct crocus_blend_state *cso_blend, 1523bf215546Sopenharmony_ci int idx) 1524bf215546Sopenharmony_ci{ 1525bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 1526bf215546Sopenharmony_ci bool independent_alpha_blend = false; 1527bf215546Sopenharmony_ci const struct pipe_rt_blend_state *rt = 1528bf215546Sopenharmony_ci &cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? idx : 0]; 1529bf215546Sopenharmony_ci const unsigned blend_enabled = rt->blend_enable; 1530bf215546Sopenharmony_ci 1531bf215546Sopenharmony_ci enum pipe_blendfactor src_rgb = 1532bf215546Sopenharmony_ci fix_blendfactor(rt->rgb_src_factor, cso_blend->cso.alpha_to_one); 1533bf215546Sopenharmony_ci enum pipe_blendfactor src_alpha = 1534bf215546Sopenharmony_ci fix_blendfactor(rt->alpha_src_factor, cso_blend->cso.alpha_to_one); 1535bf215546Sopenharmony_ci enum pipe_blendfactor dst_rgb = 1536bf215546Sopenharmony_ci fix_blendfactor(rt->rgb_dst_factor, cso_blend->cso.alpha_to_one); 1537bf215546Sopenharmony_ci enum pipe_blendfactor dst_alpha = 1538bf215546Sopenharmony_ci fix_blendfactor(rt->alpha_dst_factor, cso_blend->cso.alpha_to_one); 1539bf215546Sopenharmony_ci 1540bf215546Sopenharmony_ci if (rt->rgb_func != rt->alpha_func || 1541bf215546Sopenharmony_ci src_rgb != src_alpha || dst_rgb != dst_alpha) 1542bf215546Sopenharmony_ci independent_alpha_blend = true; 1543bf215546Sopenharmony_ci if (cso_blend->cso.logicop_enable) { 1544bf215546Sopenharmony_ci if (GFX_VER >= 8 || can_emit_logic_op(ice)) { 1545bf215546Sopenharmony_ci entry->LogicOpEnable = cso_blend->cso.logicop_enable; 1546bf215546Sopenharmony_ci entry->LogicOpFunction = cso_blend->cso.logicop_func; 1547bf215546Sopenharmony_ci } 1548bf215546Sopenharmony_ci } else if (blend_enabled) { 1549bf215546Sopenharmony_ci if (idx == 0) { 1550bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT]; 1551bf215546Sopenharmony_ci struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 1552bf215546Sopenharmony_ci entry->ColorBufferBlendEnable = 1553bf215546Sopenharmony_ci (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend); 1554bf215546Sopenharmony_ci } else 1555bf215546Sopenharmony_ci entry->ColorBufferBlendEnable = 1; 1556bf215546Sopenharmony_ci 1557bf215546Sopenharmony_ci entry->ColorBlendFunction = rt->rgb_func; 1558bf215546Sopenharmony_ci entry->AlphaBlendFunction = rt->alpha_func; 1559bf215546Sopenharmony_ci entry->SourceBlendFactor = (int) src_rgb; 1560bf215546Sopenharmony_ci entry->SourceAlphaBlendFactor = (int) src_alpha; 1561bf215546Sopenharmony_ci entry->DestinationBlendFactor = (int) dst_rgb; 1562bf215546Sopenharmony_ci entry->DestinationAlphaBlendFactor = (int) dst_alpha; 1563bf215546Sopenharmony_ci } 1564bf215546Sopenharmony_ci#if GFX_VER <= 5 1565bf215546Sopenharmony_ci /* 1566bf215546Sopenharmony_ci * Gen4/GM45/ILK can't handle have ColorBufferBlendEnable == 0 1567bf215546Sopenharmony_ci * when a dual src blend shader is in use. Setup dummy blending. 1568bf215546Sopenharmony_ci */ 1569bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT]; 1570bf215546Sopenharmony_ci struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 1571bf215546Sopenharmony_ci if (idx == 0 && !blend_enabled && wm_prog_data->dual_src_blend) { 1572bf215546Sopenharmony_ci entry->ColorBufferBlendEnable = 1; 1573bf215546Sopenharmony_ci entry->ColorBlendFunction = PIPE_BLEND_ADD; 1574bf215546Sopenharmony_ci entry->AlphaBlendFunction = PIPE_BLEND_ADD; 1575bf215546Sopenharmony_ci entry->SourceBlendFactor = PIPE_BLENDFACTOR_ONE; 1576bf215546Sopenharmony_ci entry->SourceAlphaBlendFactor = PIPE_BLENDFACTOR_ONE; 1577bf215546Sopenharmony_ci entry->DestinationBlendFactor = PIPE_BLENDFACTOR_ZERO; 1578bf215546Sopenharmony_ci entry->DestinationAlphaBlendFactor = PIPE_BLENDFACTOR_ZERO; 1579bf215546Sopenharmony_ci } 1580bf215546Sopenharmony_ci#endif 1581bf215546Sopenharmony_ci return independent_alpha_blend; 1582bf215546Sopenharmony_ci} 1583bf215546Sopenharmony_ci 1584bf215546Sopenharmony_ci/** 1585bf215546Sopenharmony_ci * The pipe->create_blend_state() driver hook. 1586bf215546Sopenharmony_ci * 1587bf215546Sopenharmony_ci * Translates a pipe_blend_state into crocus_blend_state. 1588bf215546Sopenharmony_ci */ 1589bf215546Sopenharmony_cistatic void * 1590bf215546Sopenharmony_cicrocus_create_blend_state(struct pipe_context *ctx, 1591bf215546Sopenharmony_ci const struct pipe_blend_state *state) 1592bf215546Sopenharmony_ci{ 1593bf215546Sopenharmony_ci struct crocus_blend_state *cso = malloc(sizeof(struct crocus_blend_state)); 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci cso->blend_enables = 0; 1596bf215546Sopenharmony_ci cso->color_write_enables = 0; 1597bf215546Sopenharmony_ci STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8); 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci cso->cso = *state; 1600bf215546Sopenharmony_ci cso->dual_color_blending = util_blend_state_is_dual(state, 0); 1601bf215546Sopenharmony_ci 1602bf215546Sopenharmony_ci#if GFX_VER == 8 1603bf215546Sopenharmony_ci bool indep_alpha_blend = false; 1604bf215546Sopenharmony_ci#endif 1605bf215546Sopenharmony_ci for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) { 1606bf215546Sopenharmony_ci const struct pipe_rt_blend_state *rt = 1607bf215546Sopenharmony_ci &state->rt[state->independent_blend_enable ? i : 0]; 1608bf215546Sopenharmony_ci if (rt->blend_enable) 1609bf215546Sopenharmony_ci cso->blend_enables |= 1u << i; 1610bf215546Sopenharmony_ci if (rt->colormask) 1611bf215546Sopenharmony_ci cso->color_write_enables |= 1u << i; 1612bf215546Sopenharmony_ci#if GFX_VER == 8 1613bf215546Sopenharmony_ci enum pipe_blendfactor src_rgb = 1614bf215546Sopenharmony_ci fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one); 1615bf215546Sopenharmony_ci enum pipe_blendfactor src_alpha = 1616bf215546Sopenharmony_ci fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one); 1617bf215546Sopenharmony_ci enum pipe_blendfactor dst_rgb = 1618bf215546Sopenharmony_ci fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one); 1619bf215546Sopenharmony_ci enum pipe_blendfactor dst_alpha = 1620bf215546Sopenharmony_ci fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one); 1621bf215546Sopenharmony_ci 1622bf215546Sopenharmony_ci if (rt->rgb_func != rt->alpha_func || 1623bf215546Sopenharmony_ci src_rgb != src_alpha || dst_rgb != dst_alpha) 1624bf215546Sopenharmony_ci indep_alpha_blend = true; 1625bf215546Sopenharmony_ci#endif 1626bf215546Sopenharmony_ci } 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_ci#if GFX_VER == 8 1629bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) { 1630bf215546Sopenharmony_ci /* pb.HasWriteableRT is filled in at draw time. 1631bf215546Sopenharmony_ci * pb.AlphaTestEnable is filled in at draw time. 1632bf215546Sopenharmony_ci * 1633bf215546Sopenharmony_ci * pb.ColorBufferBlendEnable is filled in at draw time so we can avoid 1634bf215546Sopenharmony_ci * setting it when dual color blending without an appropriate shader. 1635bf215546Sopenharmony_ci */ 1636bf215546Sopenharmony_ci 1637bf215546Sopenharmony_ci pb.AlphaToCoverageEnable = state->alpha_to_coverage; 1638bf215546Sopenharmony_ci pb.IndependentAlphaBlendEnable = indep_alpha_blend; 1639bf215546Sopenharmony_ci 1640bf215546Sopenharmony_ci /* The casts prevent warnings about implicit enum type conversions. */ 1641bf215546Sopenharmony_ci pb.SourceBlendFactor = 1642bf215546Sopenharmony_ci (int) fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one); 1643bf215546Sopenharmony_ci pb.SourceAlphaBlendFactor = 1644bf215546Sopenharmony_ci (int) fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one); 1645bf215546Sopenharmony_ci pb.DestinationBlendFactor = 1646bf215546Sopenharmony_ci (int) fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one); 1647bf215546Sopenharmony_ci pb.DestinationAlphaBlendFactor = 1648bf215546Sopenharmony_ci (int) fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one); 1649bf215546Sopenharmony_ci } 1650bf215546Sopenharmony_ci#endif 1651bf215546Sopenharmony_ci return cso; 1652bf215546Sopenharmony_ci} 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci/** 1655bf215546Sopenharmony_ci * The pipe->bind_blend_state() driver hook. 1656bf215546Sopenharmony_ci * 1657bf215546Sopenharmony_ci * Bind a blending CSO and flag related dirty bits. 1658bf215546Sopenharmony_ci */ 1659bf215546Sopenharmony_cistatic void 1660bf215546Sopenharmony_cicrocus_bind_blend_state(struct pipe_context *ctx, void *state) 1661bf215546Sopenharmony_ci{ 1662bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 1663bf215546Sopenharmony_ci struct crocus_blend_state *cso = state; 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_ci ice->state.cso_blend = cso; 1666bf215546Sopenharmony_ci ice->state.blend_enables = cso ? cso->blend_enables : 0; 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS; 1669bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 1670bf215546Sopenharmony_ci#if GFX_VER >= 6 1671bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE; 1672bf215546Sopenharmony_ci#endif 1673bf215546Sopenharmony_ci#if GFX_VER >= 7 1674bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS; 1675bf215546Sopenharmony_ci#endif 1676bf215546Sopenharmony_ci#if GFX_VER == 8 1677bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX; 1678bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND; 1679bf215546Sopenharmony_ci#endif 1680bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE; 1681bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 1682bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_BLEND]; 1683bf215546Sopenharmony_ci} 1684bf215546Sopenharmony_ci 1685bf215546Sopenharmony_ci/** 1686bf215546Sopenharmony_ci * Return true if the FS writes to any color outputs which are not disabled 1687bf215546Sopenharmony_ci * via color masking. 1688bf215546Sopenharmony_ci */ 1689bf215546Sopenharmony_cistatic bool 1690bf215546Sopenharmony_cihas_writeable_rt(const struct crocus_blend_state *cso_blend, 1691bf215546Sopenharmony_ci const struct shader_info *fs_info) 1692bf215546Sopenharmony_ci{ 1693bf215546Sopenharmony_ci if (!fs_info) 1694bf215546Sopenharmony_ci return false; 1695bf215546Sopenharmony_ci 1696bf215546Sopenharmony_ci unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0; 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) 1699bf215546Sopenharmony_ci rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1; 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_ci return cso_blend->color_write_enables & rt_outputs; 1702bf215546Sopenharmony_ci} 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci/** 1705bf215546Sopenharmony_ci * Gallium CSO for depth, stencil, and alpha testing state. 1706bf215546Sopenharmony_ci */ 1707bf215546Sopenharmony_cistruct crocus_depth_stencil_alpha_state { 1708bf215546Sopenharmony_ci struct pipe_depth_stencil_alpha_state cso; 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_ci bool depth_writes_enabled; 1711bf215546Sopenharmony_ci bool stencil_writes_enabled; 1712bf215546Sopenharmony_ci}; 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci/** 1715bf215546Sopenharmony_ci * The pipe->create_depth_stencil_alpha_state() driver hook. 1716bf215546Sopenharmony_ci * 1717bf215546Sopenharmony_ci * We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha 1718bf215546Sopenharmony_ci * testing state since we need pieces of it in a variety of places. 1719bf215546Sopenharmony_ci */ 1720bf215546Sopenharmony_cistatic void * 1721bf215546Sopenharmony_cicrocus_create_zsa_state(struct pipe_context *ctx, 1722bf215546Sopenharmony_ci const struct pipe_depth_stencil_alpha_state *state) 1723bf215546Sopenharmony_ci{ 1724bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *cso = 1725bf215546Sopenharmony_ci malloc(sizeof(struct crocus_depth_stencil_alpha_state)); 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci bool two_sided_stencil = state->stencil[1].enabled; 1728bf215546Sopenharmony_ci cso->cso = *state; 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci cso->depth_writes_enabled = state->depth_writemask; 1731bf215546Sopenharmony_ci cso->stencil_writes_enabled = 1732bf215546Sopenharmony_ci state->stencil[0].writemask != 0 || 1733bf215546Sopenharmony_ci (two_sided_stencil && state->stencil[1].writemask != 0); 1734bf215546Sopenharmony_ci 1735bf215546Sopenharmony_ci /* The state tracker needs to optimize away EQUAL writes for us. */ 1736bf215546Sopenharmony_ci assert(!(state->depth_func == PIPE_FUNC_EQUAL && state->depth_writemask)); 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_ci return cso; 1739bf215546Sopenharmony_ci} 1740bf215546Sopenharmony_ci 1741bf215546Sopenharmony_ci/** 1742bf215546Sopenharmony_ci * The pipe->bind_depth_stencil_alpha_state() driver hook. 1743bf215546Sopenharmony_ci * 1744bf215546Sopenharmony_ci * Bind a depth/stencil/alpha CSO and flag related dirty bits. 1745bf215546Sopenharmony_ci */ 1746bf215546Sopenharmony_cistatic void 1747bf215546Sopenharmony_cicrocus_bind_zsa_state(struct pipe_context *ctx, void *state) 1748bf215546Sopenharmony_ci{ 1749bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 1750bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa; 1751bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *new_cso = state; 1752bf215546Sopenharmony_ci 1753bf215546Sopenharmony_ci if (new_cso) { 1754bf215546Sopenharmony_ci if (cso_changed(cso.alpha_ref_value)) 1755bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE; 1756bf215546Sopenharmony_ci 1757bf215546Sopenharmony_ci if (cso_changed(cso.alpha_enabled)) 1758bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 1759bf215546Sopenharmony_ci#if GFX_VER >= 6 1760bf215546Sopenharmony_ci if (cso_changed(cso.alpha_enabled)) 1761bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE; 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci if (cso_changed(cso.alpha_func)) 1764bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE; 1765bf215546Sopenharmony_ci#endif 1766bf215546Sopenharmony_ci#if GFX_VER == 8 1767bf215546Sopenharmony_ci if (cso_changed(cso.alpha_enabled)) 1768bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND; 1769bf215546Sopenharmony_ci#endif 1770bf215546Sopenharmony_ci 1771bf215546Sopenharmony_ci if (cso_changed(depth_writes_enabled)) 1772bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 1773bf215546Sopenharmony_ci 1774bf215546Sopenharmony_ci ice->state.depth_writes_enabled = new_cso->depth_writes_enabled; 1775bf215546Sopenharmony_ci ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled; 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci#if GFX_VER <= 5 1778bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE; 1779bf215546Sopenharmony_ci#endif 1780bf215546Sopenharmony_ci } 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci ice->state.cso_zsa = new_cso; 1783bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT; 1784bf215546Sopenharmony_ci#if GFX_VER >= 6 1785bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL; 1786bf215546Sopenharmony_ci#endif 1787bf215546Sopenharmony_ci#if GFX_VER == 8 1788bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX; 1789bf215546Sopenharmony_ci#endif 1790bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_DEPTH_STENCIL_ALPHA]; 1791bf215546Sopenharmony_ci} 1792bf215546Sopenharmony_ci 1793bf215546Sopenharmony_ci#if GFX_VER == 8 1794bf215546Sopenharmony_cistatic bool 1795bf215546Sopenharmony_ciwant_pma_fix(struct crocus_context *ice) 1796bf215546Sopenharmony_ci{ 1797bf215546Sopenharmony_ci UNUSED struct crocus_screen *screen = (void *) ice->ctx.screen; 1798bf215546Sopenharmony_ci UNUSED const struct intel_device_info *devinfo = &screen->devinfo; 1799bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = (void *) 1800bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 1801bf215546Sopenharmony_ci const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 1802bf215546Sopenharmony_ci const struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 1803bf215546Sopenharmony_ci const struct crocus_blend_state *cso_blend = ice->state.cso_blend; 1804bf215546Sopenharmony_ci 1805bf215546Sopenharmony_ci /* In very specific combinations of state, we can instruct Gfx8-9 hardware 1806bf215546Sopenharmony_ci * to avoid stalling at the pixel mask array. The state equations are 1807bf215546Sopenharmony_ci * documented in these places: 1808bf215546Sopenharmony_ci * 1809bf215546Sopenharmony_ci * - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE 1810bf215546Sopenharmony_ci * - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable 1811bf215546Sopenharmony_ci * 1812bf215546Sopenharmony_ci * Both equations share some common elements: 1813bf215546Sopenharmony_ci * 1814bf215546Sopenharmony_ci * no_hiz_op = 1815bf215546Sopenharmony_ci * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 1816bf215546Sopenharmony_ci * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 1817bf215546Sopenharmony_ci * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 1818bf215546Sopenharmony_ci * 3DSTATE_WM_HZ_OP::StencilBufferClear) && 1819bf215546Sopenharmony_ci * 1820bf215546Sopenharmony_ci * killpixels = 1821bf215546Sopenharmony_ci * 3DSTATE_WM::ForceKillPix != ForceOff && 1822bf215546Sopenharmony_ci * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 1823bf215546Sopenharmony_ci * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 1824bf215546Sopenharmony_ci * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 1825bf215546Sopenharmony_ci * 3DSTATE_PS_BLEND::AlphaTestEnable || 1826bf215546Sopenharmony_ci * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 1827bf215546Sopenharmony_ci * 1828bf215546Sopenharmony_ci * (Technically the stencil PMA treats ForceKillPix differently, 1829bf215546Sopenharmony_ci * but I think this is a documentation oversight, and we don't 1830bf215546Sopenharmony_ci * ever use it in this way, so it doesn't matter). 1831bf215546Sopenharmony_ci * 1832bf215546Sopenharmony_ci * common_pma_fix = 1833bf215546Sopenharmony_ci * 3DSTATE_WM::ForceThreadDispatch != 1 && 1834bf215546Sopenharmony_ci * 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 && 1835bf215546Sopenharmony_ci * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && 1836bf215546Sopenharmony_ci * 3DSTATE_DEPTH_BUFFER::HIZ Enable && 1837bf215546Sopenharmony_ci * 3DSTATE_WM::EDSC_Mode != EDSC_PREPS && 1838bf215546Sopenharmony_ci * 3DSTATE_PS_EXTRA::PixelShaderValid && 1839bf215546Sopenharmony_ci * no_hiz_op 1840bf215546Sopenharmony_ci * 1841bf215546Sopenharmony_ci * These are always true: 1842bf215546Sopenharmony_ci * 1843bf215546Sopenharmony_ci * 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 1844bf215546Sopenharmony_ci * 3DSTATE_PS_EXTRA::PixelShaderValid 1845bf215546Sopenharmony_ci * 1846bf215546Sopenharmony_ci * Also, we never use the normal drawing path for HiZ ops; these are true: 1847bf215546Sopenharmony_ci * 1848bf215546Sopenharmony_ci * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 1849bf215546Sopenharmony_ci * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 1850bf215546Sopenharmony_ci * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 1851bf215546Sopenharmony_ci * 3DSTATE_WM_HZ_OP::StencilBufferClear) 1852bf215546Sopenharmony_ci * 1853bf215546Sopenharmony_ci * This happens sometimes: 1854bf215546Sopenharmony_ci * 1855bf215546Sopenharmony_ci * 3DSTATE_WM::ForceThreadDispatch != 1 1856bf215546Sopenharmony_ci * 1857bf215546Sopenharmony_ci * However, we choose to ignore it as it either agrees with the signal 1858bf215546Sopenharmony_ci * (dispatch was already enabled, so nothing out of the ordinary), or 1859bf215546Sopenharmony_ci * there are no framebuffer attachments (so no depth or HiZ anyway, 1860bf215546Sopenharmony_ci * meaning the PMA signal will already be disabled). 1861bf215546Sopenharmony_ci */ 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_ci if (!cso_fb->zsbuf) 1864bf215546Sopenharmony_ci return false; 1865bf215546Sopenharmony_ci 1866bf215546Sopenharmony_ci struct crocus_resource *zres, *sres; 1867bf215546Sopenharmony_ci crocus_get_depth_stencil_resources(devinfo, 1868bf215546Sopenharmony_ci cso_fb->zsbuf->texture, &zres, &sres); 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci /* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && 1871bf215546Sopenharmony_ci * 3DSTATE_DEPTH_BUFFER::HIZ Enable && 1872bf215546Sopenharmony_ci */ 1873bf215546Sopenharmony_ci if (!zres || !crocus_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level)) 1874bf215546Sopenharmony_ci return false; 1875bf215546Sopenharmony_ci 1876bf215546Sopenharmony_ci /* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */ 1877bf215546Sopenharmony_ci if (wm_prog_data->early_fragment_tests) 1878bf215546Sopenharmony_ci return false; 1879bf215546Sopenharmony_ci 1880bf215546Sopenharmony_ci /* 3DSTATE_WM::ForceKillPix != ForceOff && 1881bf215546Sopenharmony_ci * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 1882bf215546Sopenharmony_ci * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 1883bf215546Sopenharmony_ci * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 1884bf215546Sopenharmony_ci * 3DSTATE_PS_BLEND::AlphaTestEnable || 1885bf215546Sopenharmony_ci * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) 1886bf215546Sopenharmony_ci */ 1887bf215546Sopenharmony_ci bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask || 1888bf215546Sopenharmony_ci cso_blend->cso.alpha_to_coverage || cso_zsa->cso.alpha_enabled; 1889bf215546Sopenharmony_ci 1890bf215546Sopenharmony_ci /* The Gfx8 depth PMA equation becomes: 1891bf215546Sopenharmony_ci * 1892bf215546Sopenharmony_ci * depth_writes = 1893bf215546Sopenharmony_ci * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && 1894bf215546Sopenharmony_ci * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE 1895bf215546Sopenharmony_ci * 1896bf215546Sopenharmony_ci * stencil_writes = 1897bf215546Sopenharmony_ci * 3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 1898bf215546Sopenharmony_ci * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && 1899bf215546Sopenharmony_ci * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE 1900bf215546Sopenharmony_ci * 1901bf215546Sopenharmony_ci * Z_PMA_OPT = 1902bf215546Sopenharmony_ci * common_pma_fix && 1903bf215546Sopenharmony_ci * 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable && 1904bf215546Sopenharmony_ci * ((killpixels && (depth_writes || stencil_writes)) || 1905bf215546Sopenharmony_ci * 3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF) 1906bf215546Sopenharmony_ci * 1907bf215546Sopenharmony_ci */ 1908bf215546Sopenharmony_ci if (!cso_zsa->cso.depth_enabled) 1909bf215546Sopenharmony_ci return false; 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ci return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF || 1912bf215546Sopenharmony_ci (killpixels && (cso_zsa->depth_writes_enabled || 1913bf215546Sopenharmony_ci (sres && cso_zsa->stencil_writes_enabled))); 1914bf215546Sopenharmony_ci} 1915bf215546Sopenharmony_ci#endif 1916bf215546Sopenharmony_civoid 1917bf215546Sopenharmony_cigenX(crocus_update_pma_fix)(struct crocus_context *ice, 1918bf215546Sopenharmony_ci struct crocus_batch *batch, 1919bf215546Sopenharmony_ci bool enable) 1920bf215546Sopenharmony_ci{ 1921bf215546Sopenharmony_ci#if GFX_VER == 8 1922bf215546Sopenharmony_ci struct crocus_genx_state *genx = ice->state.genx; 1923bf215546Sopenharmony_ci 1924bf215546Sopenharmony_ci if (genx->pma_fix_enabled == enable) 1925bf215546Sopenharmony_ci return; 1926bf215546Sopenharmony_ci 1927bf215546Sopenharmony_ci genx->pma_fix_enabled = enable; 1928bf215546Sopenharmony_ci 1929bf215546Sopenharmony_ci /* According to the Broadwell PIPE_CONTROL documentation, software should 1930bf215546Sopenharmony_ci * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set 1931bf215546Sopenharmony_ci * prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary. 1932bf215546Sopenharmony_ci * 1933bf215546Sopenharmony_ci * The Gfx9 docs say to use a depth stall rather than a command streamer 1934bf215546Sopenharmony_ci * stall. However, the hardware seems to violently disagree. A full 1935bf215546Sopenharmony_ci * command streamer stall seems to be needed in both cases. 1936bf215546Sopenharmony_ci */ 1937bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)", 1938bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL | 1939bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1940bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH); 1941bf215546Sopenharmony_ci 1942bf215546Sopenharmony_ci crocus_emit_reg(batch, GENX(CACHE_MODE_1), reg) { 1943bf215546Sopenharmony_ci reg.NPPMAFixEnable = enable; 1944bf215546Sopenharmony_ci reg.NPEarlyZFailsDisable = enable; 1945bf215546Sopenharmony_ci reg.NPPMAFixEnableMask = true; 1946bf215546Sopenharmony_ci reg.NPEarlyZFailsDisableMask = true; 1947bf215546Sopenharmony_ci } 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_ci /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache 1950bf215546Sopenharmony_ci * Flush bits is often necessary. We do it regardless because it's easier. 1951bf215546Sopenharmony_ci * The render cache flush is also necessary if stencil writes are enabled. 1952bf215546Sopenharmony_ci * 1953bf215546Sopenharmony_ci * Again, the Gfx9 docs give a different set of flushes but the Broadwell 1954bf215546Sopenharmony_ci * flushes seem to work just as well. 1955bf215546Sopenharmony_ci */ 1956bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)", 1957bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_STALL | 1958bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1959bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH); 1960bf215546Sopenharmony_ci#endif 1961bf215546Sopenharmony_ci} 1962bf215546Sopenharmony_ci 1963bf215546Sopenharmony_cistatic float 1964bf215546Sopenharmony_ciget_line_width(const struct pipe_rasterizer_state *state) 1965bf215546Sopenharmony_ci{ 1966bf215546Sopenharmony_ci float line_width = state->line_width; 1967bf215546Sopenharmony_ci 1968bf215546Sopenharmony_ci /* From the OpenGL 4.4 spec: 1969bf215546Sopenharmony_ci * 1970bf215546Sopenharmony_ci * "The actual width of non-antialiased lines is determined by rounding 1971bf215546Sopenharmony_ci * the supplied width to the nearest integer, then clamping it to the 1972bf215546Sopenharmony_ci * implementation-dependent maximum non-antialiased line width." 1973bf215546Sopenharmony_ci */ 1974bf215546Sopenharmony_ci if (!state->multisample && !state->line_smooth) 1975bf215546Sopenharmony_ci line_width = roundf(state->line_width); 1976bf215546Sopenharmony_ci 1977bf215546Sopenharmony_ci if (!state->multisample && state->line_smooth && line_width < 1.5f) { 1978bf215546Sopenharmony_ci /* For 1 pixel line thickness or less, the general anti-aliasing 1979bf215546Sopenharmony_ci * algorithm gives up, and a garbage line is generated. Setting a 1980bf215546Sopenharmony_ci * Line Width of 0.0 specifies the rasterization of the "thinnest" 1981bf215546Sopenharmony_ci * (one-pixel-wide), non-antialiased lines. 1982bf215546Sopenharmony_ci * 1983bf215546Sopenharmony_ci * Lines rendered with zero Line Width are rasterized using the 1984bf215546Sopenharmony_ci * "Grid Intersection Quantization" rules as specified by the 1985bf215546Sopenharmony_ci * "Zero-Width (Cosmetic) Line Rasterization" section of the docs. 1986bf215546Sopenharmony_ci */ 1987bf215546Sopenharmony_ci /* hack around this for gfx4/5 fps counters in hud. */ 1988bf215546Sopenharmony_ci line_width = GFX_VER < 6 ? 1.5f : 0.0f; 1989bf215546Sopenharmony_ci } 1990bf215546Sopenharmony_ci return line_width; 1991bf215546Sopenharmony_ci} 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci/** 1994bf215546Sopenharmony_ci * The pipe->create_rasterizer_state() driver hook. 1995bf215546Sopenharmony_ci */ 1996bf215546Sopenharmony_cistatic void * 1997bf215546Sopenharmony_cicrocus_create_rasterizer_state(struct pipe_context *ctx, 1998bf215546Sopenharmony_ci const struct pipe_rasterizer_state *state) 1999bf215546Sopenharmony_ci{ 2000bf215546Sopenharmony_ci struct crocus_rasterizer_state *cso = 2001bf215546Sopenharmony_ci malloc(sizeof(struct crocus_rasterizer_state)); 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ci cso->fill_mode_point_or_line = 2004bf215546Sopenharmony_ci state->fill_front == PIPE_POLYGON_MODE_LINE || 2005bf215546Sopenharmony_ci state->fill_front == PIPE_POLYGON_MODE_POINT || 2006bf215546Sopenharmony_ci state->fill_back == PIPE_POLYGON_MODE_LINE || 2007bf215546Sopenharmony_ci state->fill_back == PIPE_POLYGON_MODE_POINT; 2008bf215546Sopenharmony_ci 2009bf215546Sopenharmony_ci if (state->clip_plane_enable != 0) 2010bf215546Sopenharmony_ci cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1; 2011bf215546Sopenharmony_ci else 2012bf215546Sopenharmony_ci cso->num_clip_plane_consts = 0; 2013bf215546Sopenharmony_ci 2014bf215546Sopenharmony_ci cso->cso = *state; 2015bf215546Sopenharmony_ci 2016bf215546Sopenharmony_ci#if GFX_VER >= 6 2017bf215546Sopenharmony_ci float line_width = get_line_width(state); 2018bf215546Sopenharmony_ci 2019bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_SF), cso->sf, sf) { 2020bf215546Sopenharmony_ci sf.StatisticsEnable = true; 2021bf215546Sopenharmony_ci sf.AALineDistanceMode = AALINEDISTANCE_TRUE; 2022bf215546Sopenharmony_ci sf.LineEndCapAntialiasingRegionWidth = 2023bf215546Sopenharmony_ci state->line_smooth ? _10pixels : _05pixels; 2024bf215546Sopenharmony_ci sf.LastPixelEnable = state->line_last_pixel; 2025bf215546Sopenharmony_ci#if GFX_VER <= 7 2026bf215546Sopenharmony_ci sf.AntialiasingEnable = state->line_smooth; 2027bf215546Sopenharmony_ci#endif 2028bf215546Sopenharmony_ci#if GFX_VER == 8 2029bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 2030bf215546Sopenharmony_ci if (screen->devinfo.platform == INTEL_PLATFORM_CHV) 2031bf215546Sopenharmony_ci sf.CHVLineWidth = line_width; 2032bf215546Sopenharmony_ci else 2033bf215546Sopenharmony_ci sf.LineWidth = line_width; 2034bf215546Sopenharmony_ci#else 2035bf215546Sopenharmony_ci sf.LineWidth = line_width; 2036bf215546Sopenharmony_ci#endif 2037bf215546Sopenharmony_ci sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State; 2038bf215546Sopenharmony_ci sf.PointWidth = state->point_size; 2039bf215546Sopenharmony_ci 2040bf215546Sopenharmony_ci if (state->flatshade_first) { 2041bf215546Sopenharmony_ci sf.TriangleFanProvokingVertexSelect = 1; 2042bf215546Sopenharmony_ci } else { 2043bf215546Sopenharmony_ci sf.TriangleStripListProvokingVertexSelect = 2; 2044bf215546Sopenharmony_ci sf.TriangleFanProvokingVertexSelect = 2; 2045bf215546Sopenharmony_ci sf.LineStripListProvokingVertexSelect = 1; 2046bf215546Sopenharmony_ci } 2047bf215546Sopenharmony_ci 2048bf215546Sopenharmony_ci#if GFX_VER == 6 2049bf215546Sopenharmony_ci sf.AttributeSwizzleEnable = true; 2050bf215546Sopenharmony_ci if (state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) 2051bf215546Sopenharmony_ci sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT; 2052bf215546Sopenharmony_ci else 2053bf215546Sopenharmony_ci sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT; 2054bf215546Sopenharmony_ci#endif 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ci#if GFX_VER <= 7 2057bf215546Sopenharmony_ci sf.FrontWinding = state->front_ccw ? 1 : 0; // Or the other way... 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci#if GFX_VER >= 6 2060bf215546Sopenharmony_ci sf.GlobalDepthOffsetEnableSolid = state->offset_tri; 2061bf215546Sopenharmony_ci sf.GlobalDepthOffsetEnableWireframe = state->offset_line; 2062bf215546Sopenharmony_ci sf.GlobalDepthOffsetEnablePoint = state->offset_point; 2063bf215546Sopenharmony_ci sf.GlobalDepthOffsetConstant = state->offset_units * 2; 2064bf215546Sopenharmony_ci sf.GlobalDepthOffsetScale = state->offset_scale; 2065bf215546Sopenharmony_ci sf.GlobalDepthOffsetClamp = state->offset_clamp; 2066bf215546Sopenharmony_ci 2067bf215546Sopenharmony_ci sf.FrontFaceFillMode = translate_fill_mode(state->fill_front); 2068bf215546Sopenharmony_ci sf.BackFaceFillMode = translate_fill_mode(state->fill_back); 2069bf215546Sopenharmony_ci#endif 2070bf215546Sopenharmony_ci 2071bf215546Sopenharmony_ci sf.CullMode = translate_cull_mode(state->cull_face); 2072bf215546Sopenharmony_ci sf.ScissorRectangleEnable = true; 2073bf215546Sopenharmony_ci 2074bf215546Sopenharmony_ci#if GFX_VERx10 == 75 2075bf215546Sopenharmony_ci sf.LineStippleEnable = state->line_stipple_enable; 2076bf215546Sopenharmony_ci#endif 2077bf215546Sopenharmony_ci#endif 2078bf215546Sopenharmony_ci } 2079bf215546Sopenharmony_ci#endif 2080bf215546Sopenharmony_ci 2081bf215546Sopenharmony_ci#if GFX_VER == 8 2082bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) { 2083bf215546Sopenharmony_ci rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise; 2084bf215546Sopenharmony_ci rr.CullMode = translate_cull_mode(state->cull_face); 2085bf215546Sopenharmony_ci rr.FrontFaceFillMode = translate_fill_mode(state->fill_front); 2086bf215546Sopenharmony_ci rr.BackFaceFillMode = translate_fill_mode(state->fill_back); 2087bf215546Sopenharmony_ci rr.DXMultisampleRasterizationEnable = state->multisample; 2088bf215546Sopenharmony_ci rr.GlobalDepthOffsetEnableSolid = state->offset_tri; 2089bf215546Sopenharmony_ci rr.GlobalDepthOffsetEnableWireframe = state->offset_line; 2090bf215546Sopenharmony_ci rr.GlobalDepthOffsetEnablePoint = state->offset_point; 2091bf215546Sopenharmony_ci rr.GlobalDepthOffsetConstant = state->offset_units * 2; 2092bf215546Sopenharmony_ci rr.GlobalDepthOffsetScale = state->offset_scale; 2093bf215546Sopenharmony_ci rr.GlobalDepthOffsetClamp = state->offset_clamp; 2094bf215546Sopenharmony_ci rr.SmoothPointEnable = state->point_smooth; 2095bf215546Sopenharmony_ci rr.AntialiasingEnable = state->line_smooth; 2096bf215546Sopenharmony_ci rr.ScissorRectangleEnable = state->scissor; 2097bf215546Sopenharmony_ci rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far); 2098bf215546Sopenharmony_ci } 2099bf215546Sopenharmony_ci#endif 2100bf215546Sopenharmony_ci 2101bf215546Sopenharmony_ci#if GFX_VER >= 6 2102bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) { 2103bf215546Sopenharmony_ci /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from 2104bf215546Sopenharmony_ci * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB. 2105bf215546Sopenharmony_ci */ 2106bf215546Sopenharmony_ci#if GFX_VER >= 7 2107bf215546Sopenharmony_ci cl.EarlyCullEnable = true; 2108bf215546Sopenharmony_ci#endif 2109bf215546Sopenharmony_ci 2110bf215546Sopenharmony_ci#if GFX_VER == 7 2111bf215546Sopenharmony_ci cl.FrontWinding = state->front_ccw ? 1 : 0; 2112bf215546Sopenharmony_ci cl.CullMode = translate_cull_mode(state->cull_face); 2113bf215546Sopenharmony_ci#endif 2114bf215546Sopenharmony_ci cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable; 2115bf215546Sopenharmony_ci#if GFX_VER < 8 2116bf215546Sopenharmony_ci cl.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far); 2117bf215546Sopenharmony_ci#endif 2118bf215546Sopenharmony_ci cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL; 2119bf215546Sopenharmony_ci cl.GuardbandClipTestEnable = true; 2120bf215546Sopenharmony_ci cl.ClipEnable = true; 2121bf215546Sopenharmony_ci cl.MinimumPointWidth = 0.125; 2122bf215546Sopenharmony_ci cl.MaximumPointWidth = 255.875; 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_ci#if GFX_VER == 8 2125bf215546Sopenharmony_ci cl.ForceUserClipDistanceClipTestEnableBitmask = true; 2126bf215546Sopenharmony_ci#endif 2127bf215546Sopenharmony_ci 2128bf215546Sopenharmony_ci if (state->flatshade_first) { 2129bf215546Sopenharmony_ci cl.TriangleFanProvokingVertexSelect = 1; 2130bf215546Sopenharmony_ci } else { 2131bf215546Sopenharmony_ci cl.TriangleStripListProvokingVertexSelect = 2; 2132bf215546Sopenharmony_ci cl.TriangleFanProvokingVertexSelect = 2; 2133bf215546Sopenharmony_ci cl.LineStripListProvokingVertexSelect = 1; 2134bf215546Sopenharmony_ci } 2135bf215546Sopenharmony_ci } 2136bf215546Sopenharmony_ci#endif 2137bf215546Sopenharmony_ci 2138bf215546Sopenharmony_ci /* Remap from 0..255 back to 1..256 */ 2139bf215546Sopenharmony_ci const unsigned line_stipple_factor = state->line_stipple_factor + 1; 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) { 2142bf215546Sopenharmony_ci if (state->line_stipple_enable) { 2143bf215546Sopenharmony_ci line.LineStipplePattern = state->line_stipple_pattern; 2144bf215546Sopenharmony_ci line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor; 2145bf215546Sopenharmony_ci line.LineStippleRepeatCount = line_stipple_factor; 2146bf215546Sopenharmony_ci } 2147bf215546Sopenharmony_ci } 2148bf215546Sopenharmony_ci 2149bf215546Sopenharmony_ci return cso; 2150bf215546Sopenharmony_ci} 2151bf215546Sopenharmony_ci 2152bf215546Sopenharmony_ci/** 2153bf215546Sopenharmony_ci * The pipe->bind_rasterizer_state() driver hook. 2154bf215546Sopenharmony_ci * 2155bf215546Sopenharmony_ci * Bind a rasterizer CSO and flag related dirty bits. 2156bf215546Sopenharmony_ci */ 2157bf215546Sopenharmony_cistatic void 2158bf215546Sopenharmony_cicrocus_bind_rasterizer_state(struct pipe_context *ctx, void *state) 2159bf215546Sopenharmony_ci{ 2160bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 2161bf215546Sopenharmony_ci struct crocus_rasterizer_state *old_cso = ice->state.cso_rast; 2162bf215546Sopenharmony_ci struct crocus_rasterizer_state *new_cso = state; 2163bf215546Sopenharmony_ci 2164bf215546Sopenharmony_ci if (new_cso) { 2165bf215546Sopenharmony_ci /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */ 2166bf215546Sopenharmony_ci if (cso_changed_memcmp(line_stipple)) 2167bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_LINE_STIPPLE; 2168bf215546Sopenharmony_ci#if GFX_VER >= 6 2169bf215546Sopenharmony_ci if (cso_changed(cso.half_pixel_center)) 2170bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE; 2171bf215546Sopenharmony_ci if (cso_changed(cso.scissor)) 2172bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT; 2173bf215546Sopenharmony_ci if (cso_changed(cso.multisample)) 2174bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 2175bf215546Sopenharmony_ci#else 2176bf215546Sopenharmony_ci if (cso_changed(cso.scissor)) 2177bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT; 2178bf215546Sopenharmony_ci#endif 2179bf215546Sopenharmony_ci 2180bf215546Sopenharmony_ci if (cso_changed(cso.line_stipple_enable) || cso_changed(cso.poly_stipple_enable)) 2181bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 2182bf215546Sopenharmony_ci 2183bf215546Sopenharmony_ci#if GFX_VER >= 6 2184bf215546Sopenharmony_ci if (cso_changed(cso.rasterizer_discard)) 2185bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP; 2186bf215546Sopenharmony_ci 2187bf215546Sopenharmony_ci if (cso_changed(cso.flatshade_first)) 2188bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_STREAMOUT; 2189bf215546Sopenharmony_ci#endif 2190bf215546Sopenharmony_ci 2191bf215546Sopenharmony_ci if (cso_changed(cso.depth_clip_near) || cso_changed(cso.depth_clip_far) || 2192bf215546Sopenharmony_ci cso_changed(cso.clip_halfz)) 2193bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT; 2194bf215546Sopenharmony_ci 2195bf215546Sopenharmony_ci#if GFX_VER >= 7 2196bf215546Sopenharmony_ci if (cso_changed(cso.sprite_coord_enable) || 2197bf215546Sopenharmony_ci cso_changed(cso.sprite_coord_mode) || 2198bf215546Sopenharmony_ci cso_changed(cso.light_twoside)) 2199bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE; 2200bf215546Sopenharmony_ci#endif 2201bf215546Sopenharmony_ci#if GFX_VER <= 5 2202bf215546Sopenharmony_ci if (cso_changed(cso.clip_plane_enable)) 2203bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE; 2204bf215546Sopenharmony_ci#endif 2205bf215546Sopenharmony_ci } 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci ice->state.cso_rast = new_cso; 2208bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RASTER; 2209bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_CLIP; 2210bf215546Sopenharmony_ci#if GFX_VER <= 5 2211bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG; 2212bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 2213bf215546Sopenharmony_ci#endif 2214bf215546Sopenharmony_ci#if GFX_VER <= 6 2215bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG; 2216bf215546Sopenharmony_ci#endif 2217bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_RASTERIZER]; 2218bf215546Sopenharmony_ci} 2219bf215546Sopenharmony_ci 2220bf215546Sopenharmony_ci/** 2221bf215546Sopenharmony_ci * Return true if the given wrap mode requires the border color to exist. 2222bf215546Sopenharmony_ci * 2223bf215546Sopenharmony_ci * (We can skip uploading it if the sampler isn't going to use it.) 2224bf215546Sopenharmony_ci */ 2225bf215546Sopenharmony_cistatic bool 2226bf215546Sopenharmony_ciwrap_mode_needs_border_color(unsigned wrap_mode) 2227bf215546Sopenharmony_ci{ 2228bf215546Sopenharmony_ci#if GFX_VER == 8 2229bf215546Sopenharmony_ci return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER; 2230bf215546Sopenharmony_ci#else 2231bf215546Sopenharmony_ci return wrap_mode == TCM_CLAMP_BORDER; 2232bf215546Sopenharmony_ci#endif 2233bf215546Sopenharmony_ci} 2234bf215546Sopenharmony_ci 2235bf215546Sopenharmony_ci/** 2236bf215546Sopenharmony_ci * Gallium CSO for sampler state. 2237bf215546Sopenharmony_ci */ 2238bf215546Sopenharmony_cistruct crocus_sampler_state { 2239bf215546Sopenharmony_ci struct pipe_sampler_state pstate; 2240bf215546Sopenharmony_ci union pipe_color_union border_color; 2241bf215546Sopenharmony_ci bool needs_border_color; 2242bf215546Sopenharmony_ci unsigned wrap_s; 2243bf215546Sopenharmony_ci unsigned wrap_t; 2244bf215546Sopenharmony_ci unsigned wrap_r; 2245bf215546Sopenharmony_ci unsigned mag_img_filter; 2246bf215546Sopenharmony_ci float min_lod; 2247bf215546Sopenharmony_ci}; 2248bf215546Sopenharmony_ci 2249bf215546Sopenharmony_ci/** 2250bf215546Sopenharmony_ci * The pipe->create_sampler_state() driver hook. 2251bf215546Sopenharmony_ci * 2252bf215546Sopenharmony_ci * We fill out SAMPLER_STATE (except for the border color pointer), and 2253bf215546Sopenharmony_ci * store that on the CPU. It doesn't make sense to upload it to a GPU 2254bf215546Sopenharmony_ci * buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires 2255bf215546Sopenharmony_ci * all bound sampler states to be in contiguous memor. 2256bf215546Sopenharmony_ci */ 2257bf215546Sopenharmony_cistatic void * 2258bf215546Sopenharmony_cicrocus_create_sampler_state(struct pipe_context *ctx, 2259bf215546Sopenharmony_ci const struct pipe_sampler_state *state) 2260bf215546Sopenharmony_ci{ 2261bf215546Sopenharmony_ci struct crocus_sampler_state *cso = CALLOC_STRUCT(crocus_sampler_state); 2262bf215546Sopenharmony_ci 2263bf215546Sopenharmony_ci if (!cso) 2264bf215546Sopenharmony_ci return NULL; 2265bf215546Sopenharmony_ci 2266bf215546Sopenharmony_ci STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST); 2267bf215546Sopenharmony_ci STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR); 2268bf215546Sopenharmony_ci 2269bf215546Sopenharmony_ci bool either_nearest = state->min_img_filter == PIPE_TEX_FILTER_NEAREST || 2270bf215546Sopenharmony_ci state->mag_img_filter == PIPE_TEX_FILTER_NEAREST; 2271bf215546Sopenharmony_ci cso->wrap_s = translate_wrap(state->wrap_s, either_nearest); 2272bf215546Sopenharmony_ci cso->wrap_t = translate_wrap(state->wrap_t, either_nearest); 2273bf215546Sopenharmony_ci cso->wrap_r = translate_wrap(state->wrap_r, either_nearest); 2274bf215546Sopenharmony_ci 2275bf215546Sopenharmony_ci cso->pstate = *state; 2276bf215546Sopenharmony_ci 2277bf215546Sopenharmony_ci memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color)); 2278bf215546Sopenharmony_ci 2279bf215546Sopenharmony_ci cso->needs_border_color = wrap_mode_needs_border_color(cso->wrap_s) || 2280bf215546Sopenharmony_ci wrap_mode_needs_border_color(cso->wrap_t) || 2281bf215546Sopenharmony_ci wrap_mode_needs_border_color(cso->wrap_r); 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci cso->min_lod = state->min_lod; 2284bf215546Sopenharmony_ci cso->mag_img_filter = state->mag_img_filter; 2285bf215546Sopenharmony_ci 2286bf215546Sopenharmony_ci // XXX: explain this code ported from ilo...I don't get it at all... 2287bf215546Sopenharmony_ci if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && 2288bf215546Sopenharmony_ci state->min_lod > 0.0f) { 2289bf215546Sopenharmony_ci cso->min_lod = 0.0f; 2290bf215546Sopenharmony_ci cso->mag_img_filter = state->min_img_filter; 2291bf215546Sopenharmony_ci } 2292bf215546Sopenharmony_ci 2293bf215546Sopenharmony_ci return cso; 2294bf215546Sopenharmony_ci} 2295bf215546Sopenharmony_ci 2296bf215546Sopenharmony_ci/** 2297bf215546Sopenharmony_ci * The pipe->bind_sampler_states() driver hook. 2298bf215546Sopenharmony_ci */ 2299bf215546Sopenharmony_cistatic void 2300bf215546Sopenharmony_cicrocus_bind_sampler_states(struct pipe_context *ctx, 2301bf215546Sopenharmony_ci enum pipe_shader_type p_stage, 2302bf215546Sopenharmony_ci unsigned start, unsigned count, 2303bf215546Sopenharmony_ci void **states) 2304bf215546Sopenharmony_ci{ 2305bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 2306bf215546Sopenharmony_ci gl_shader_stage stage = stage_from_pipe(p_stage); 2307bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 2308bf215546Sopenharmony_ci 2309bf215546Sopenharmony_ci assert(start + count <= CROCUS_MAX_TEXTURE_SAMPLERS); 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ci bool dirty = false; 2312bf215546Sopenharmony_ci 2313bf215546Sopenharmony_ci for (int i = 0; i < count; i++) { 2314bf215546Sopenharmony_ci if (shs->samplers[start + i] != states[i]) { 2315bf215546Sopenharmony_ci shs->samplers[start + i] = states[i]; 2316bf215546Sopenharmony_ci dirty = true; 2317bf215546Sopenharmony_ci } 2318bf215546Sopenharmony_ci } 2319bf215546Sopenharmony_ci 2320bf215546Sopenharmony_ci if (dirty) { 2321bf215546Sopenharmony_ci#if GFX_VER <= 5 2322bf215546Sopenharmony_ci if (p_stage == PIPE_SHADER_FRAGMENT) 2323bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 2324bf215546Sopenharmony_ci else if (p_stage == PIPE_SHADER_VERTEX) 2325bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS; 2326bf215546Sopenharmony_ci#endif 2327bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage; 2328bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES]; 2329bf215546Sopenharmony_ci } 2330bf215546Sopenharmony_ci} 2331bf215546Sopenharmony_ci 2332bf215546Sopenharmony_cienum samp_workaround { 2333bf215546Sopenharmony_ci SAMP_NORMAL, 2334bf215546Sopenharmony_ci SAMP_CUBE_CLAMP, 2335bf215546Sopenharmony_ci SAMP_CUBE_CUBE, 2336bf215546Sopenharmony_ci SAMP_T_WRAP, 2337bf215546Sopenharmony_ci}; 2338bf215546Sopenharmony_ci 2339bf215546Sopenharmony_cistatic void 2340bf215546Sopenharmony_cicrocus_upload_sampler_state(struct crocus_batch *batch, 2341bf215546Sopenharmony_ci struct crocus_sampler_state *cso, 2342bf215546Sopenharmony_ci uint32_t border_color_offset, 2343bf215546Sopenharmony_ci enum samp_workaround samp_workaround, 2344bf215546Sopenharmony_ci uint32_t first_level, 2345bf215546Sopenharmony_ci void *map) 2346bf215546Sopenharmony_ci{ 2347bf215546Sopenharmony_ci struct pipe_sampler_state *state = &cso->pstate; 2348bf215546Sopenharmony_ci uint32_t wrap_s, wrap_t, wrap_r; 2349bf215546Sopenharmony_ci 2350bf215546Sopenharmony_ci wrap_s = cso->wrap_s; 2351bf215546Sopenharmony_ci wrap_t = cso->wrap_t; 2352bf215546Sopenharmony_ci wrap_r = cso->wrap_r; 2353bf215546Sopenharmony_ci 2354bf215546Sopenharmony_ci switch (samp_workaround) { 2355bf215546Sopenharmony_ci case SAMP_CUBE_CLAMP: 2356bf215546Sopenharmony_ci wrap_s = TCM_CLAMP; 2357bf215546Sopenharmony_ci wrap_t = TCM_CLAMP; 2358bf215546Sopenharmony_ci wrap_r = TCM_CLAMP; 2359bf215546Sopenharmony_ci break; 2360bf215546Sopenharmony_ci case SAMP_CUBE_CUBE: 2361bf215546Sopenharmony_ci wrap_s = TCM_CUBE; 2362bf215546Sopenharmony_ci wrap_t = TCM_CUBE; 2363bf215546Sopenharmony_ci wrap_r = TCM_CUBE; 2364bf215546Sopenharmony_ci break; 2365bf215546Sopenharmony_ci case SAMP_T_WRAP: 2366bf215546Sopenharmony_ci wrap_t = TCM_WRAP; 2367bf215546Sopenharmony_ci break; 2368bf215546Sopenharmony_ci default: 2369bf215546Sopenharmony_ci break; 2370bf215546Sopenharmony_ci } 2371bf215546Sopenharmony_ci 2372bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(SAMPLER_STATE), map, samp) { 2373bf215546Sopenharmony_ci samp.TCXAddressControlMode = wrap_s; 2374bf215546Sopenharmony_ci samp.TCYAddressControlMode = wrap_t; 2375bf215546Sopenharmony_ci samp.TCZAddressControlMode = wrap_r; 2376bf215546Sopenharmony_ci 2377bf215546Sopenharmony_ci#if GFX_VER >= 6 2378bf215546Sopenharmony_ci samp.NonnormalizedCoordinateEnable = !state->normalized_coords; 2379bf215546Sopenharmony_ci#endif 2380bf215546Sopenharmony_ci samp.MinModeFilter = state->min_img_filter; 2381bf215546Sopenharmony_ci samp.MagModeFilter = cso->mag_img_filter; 2382bf215546Sopenharmony_ci samp.MipModeFilter = translate_mip_filter(state->min_mip_filter); 2383bf215546Sopenharmony_ci samp.MaximumAnisotropy = RATIO21; 2384bf215546Sopenharmony_ci 2385bf215546Sopenharmony_ci if (state->max_anisotropy >= 2) { 2386bf215546Sopenharmony_ci if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) { 2387bf215546Sopenharmony_ci samp.MinModeFilter = MAPFILTER_ANISOTROPIC; 2388bf215546Sopenharmony_ci#if GFX_VER >= 7 2389bf215546Sopenharmony_ci samp.AnisotropicAlgorithm = EWAApproximation; 2390bf215546Sopenharmony_ci#endif 2391bf215546Sopenharmony_ci } 2392bf215546Sopenharmony_ci 2393bf215546Sopenharmony_ci if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR) 2394bf215546Sopenharmony_ci samp.MagModeFilter = MAPFILTER_ANISOTROPIC; 2395bf215546Sopenharmony_ci 2396bf215546Sopenharmony_ci samp.MaximumAnisotropy = 2397bf215546Sopenharmony_ci MIN2((state->max_anisotropy - 2) / 2, RATIO161); 2398bf215546Sopenharmony_ci } 2399bf215546Sopenharmony_ci 2400bf215546Sopenharmony_ci /* Set address rounding bits if not using nearest filtering. */ 2401bf215546Sopenharmony_ci if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) { 2402bf215546Sopenharmony_ci samp.UAddressMinFilterRoundingEnable = true; 2403bf215546Sopenharmony_ci samp.VAddressMinFilterRoundingEnable = true; 2404bf215546Sopenharmony_ci samp.RAddressMinFilterRoundingEnable = true; 2405bf215546Sopenharmony_ci } 2406bf215546Sopenharmony_ci 2407bf215546Sopenharmony_ci if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) { 2408bf215546Sopenharmony_ci samp.UAddressMagFilterRoundingEnable = true; 2409bf215546Sopenharmony_ci samp.VAddressMagFilterRoundingEnable = true; 2410bf215546Sopenharmony_ci samp.RAddressMagFilterRoundingEnable = true; 2411bf215546Sopenharmony_ci } 2412bf215546Sopenharmony_ci 2413bf215546Sopenharmony_ci if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) 2414bf215546Sopenharmony_ci samp.ShadowFunction = translate_shadow_func(state->compare_func); 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_ci const float hw_max_lod = GFX_VER >= 7 ? 14 : 13; 2417bf215546Sopenharmony_ci 2418bf215546Sopenharmony_ci#if GFX_VER == 8 2419bf215546Sopenharmony_ci samp.LODPreClampMode = CLAMP_MODE_OGL; 2420bf215546Sopenharmony_ci#else 2421bf215546Sopenharmony_ci samp.LODPreClampEnable = true; 2422bf215546Sopenharmony_ci#endif 2423bf215546Sopenharmony_ci samp.MinLOD = CLAMP(cso->min_lod, 0, hw_max_lod); 2424bf215546Sopenharmony_ci samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod); 2425bf215546Sopenharmony_ci samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15); 2426bf215546Sopenharmony_ci 2427bf215546Sopenharmony_ci#if GFX_VER == 6 2428bf215546Sopenharmony_ci samp.BaseMipLevel = CLAMP(first_level, 0, hw_max_lod); 2429bf215546Sopenharmony_ci samp.MinandMagStateNotEqual = samp.MinModeFilter != samp.MagModeFilter; 2430bf215546Sopenharmony_ci#endif 2431bf215546Sopenharmony_ci 2432bf215546Sopenharmony_ci#if GFX_VER < 6 2433bf215546Sopenharmony_ci samp.BorderColorPointer = 2434bf215546Sopenharmony_ci ro_bo(batch->state.bo, border_color_offset); 2435bf215546Sopenharmony_ci#else 2436bf215546Sopenharmony_ci samp.BorderColorPointer = border_color_offset; 2437bf215546Sopenharmony_ci#endif 2438bf215546Sopenharmony_ci } 2439bf215546Sopenharmony_ci} 2440bf215546Sopenharmony_ci 2441bf215546Sopenharmony_cistatic void 2442bf215546Sopenharmony_cicrocus_upload_border_color(struct crocus_batch *batch, 2443bf215546Sopenharmony_ci struct crocus_sampler_state *cso, 2444bf215546Sopenharmony_ci struct crocus_sampler_view *tex, 2445bf215546Sopenharmony_ci uint32_t *bc_offset) 2446bf215546Sopenharmony_ci{ 2447bf215546Sopenharmony_ci /* We may need to swizzle the border color for format faking. 2448bf215546Sopenharmony_ci * A/LA formats are faked as R/RG with 000R or R00G swizzles. 2449bf215546Sopenharmony_ci * This means we need to move the border color's A channel into 2450bf215546Sopenharmony_ci * the R or G channels so that those read swizzles will move it 2451bf215546Sopenharmony_ci * back into A. 2452bf215546Sopenharmony_ci */ 2453bf215546Sopenharmony_ci enum pipe_format internal_format = PIPE_FORMAT_NONE; 2454bf215546Sopenharmony_ci union pipe_color_union *color = &cso->border_color; 2455bf215546Sopenharmony_ci union pipe_color_union tmp; 2456bf215546Sopenharmony_ci if (tex) { 2457bf215546Sopenharmony_ci internal_format = tex->res->internal_format; 2458bf215546Sopenharmony_ci 2459bf215546Sopenharmony_ci if (util_format_is_alpha(internal_format)) { 2460bf215546Sopenharmony_ci unsigned char swz[4] = { 2461bf215546Sopenharmony_ci PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, 2462bf215546Sopenharmony_ci PIPE_SWIZZLE_0, PIPE_SWIZZLE_W, 2463bf215546Sopenharmony_ci }; 2464bf215546Sopenharmony_ci util_format_apply_color_swizzle(&tmp, color, swz, true); 2465bf215546Sopenharmony_ci color = &tmp; 2466bf215546Sopenharmony_ci } else if (util_format_is_luminance_alpha(internal_format) && 2467bf215546Sopenharmony_ci internal_format != PIPE_FORMAT_L8A8_SRGB) { 2468bf215546Sopenharmony_ci unsigned char swz[4] = { 2469bf215546Sopenharmony_ci PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, 2470bf215546Sopenharmony_ci PIPE_SWIZZLE_X, PIPE_SWIZZLE_W 2471bf215546Sopenharmony_ci }; 2472bf215546Sopenharmony_ci util_format_apply_color_swizzle(&tmp, color, swz, true); 2473bf215546Sopenharmony_ci color = &tmp; 2474bf215546Sopenharmony_ci } 2475bf215546Sopenharmony_ci } 2476bf215546Sopenharmony_ci bool is_integer_format = util_format_is_pure_integer(internal_format); 2477bf215546Sopenharmony_ci unsigned sbc_size = GENX(SAMPLER_BORDER_COLOR_STATE_length) * 4; 2478bf215546Sopenharmony_ci const int sbc_align = (GFX_VER == 8 ? 64 : ((GFX_VERx10 == 75 && is_integer_format) ? 512 : 32)); 2479bf215546Sopenharmony_ci uint32_t *sbc = stream_state(batch, sbc_size, sbc_align, bc_offset); 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_ci struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 }; 2482bf215546Sopenharmony_ci 2483bf215546Sopenharmony_ci#define ASSIGN(dst, src) \ 2484bf215546Sopenharmony_ci do { \ 2485bf215546Sopenharmony_ci dst = src; \ 2486bf215546Sopenharmony_ci } while (0) 2487bf215546Sopenharmony_ci 2488bf215546Sopenharmony_ci#define ASSIGNu16(dst, src) \ 2489bf215546Sopenharmony_ci do { \ 2490bf215546Sopenharmony_ci dst = (uint16_t)src; \ 2491bf215546Sopenharmony_ci } while (0) 2492bf215546Sopenharmony_ci 2493bf215546Sopenharmony_ci#define ASSIGNu8(dst, src) \ 2494bf215546Sopenharmony_ci do { \ 2495bf215546Sopenharmony_ci dst = (uint8_t)src; \ 2496bf215546Sopenharmony_ci } while (0) 2497bf215546Sopenharmony_ci 2498bf215546Sopenharmony_ci#define BORDER_COLOR_ATTR(macro, _color_type, src) \ 2499bf215546Sopenharmony_ci macro(state.BorderColor ## _color_type ## Red, src[0]); \ 2500bf215546Sopenharmony_ci macro(state.BorderColor ## _color_type ## Green, src[1]); \ 2501bf215546Sopenharmony_ci macro(state.BorderColor ## _color_type ## Blue, src[2]); \ 2502bf215546Sopenharmony_ci macro(state.BorderColor ## _color_type ## Alpha, src[3]); 2503bf215546Sopenharmony_ci 2504bf215546Sopenharmony_ci#if GFX_VER >= 8 2505bf215546Sopenharmony_ci /* On Broadwell, the border color is represented as four 32-bit floats, 2506bf215546Sopenharmony_ci * integers, or unsigned values, interpreted according to the surface 2507bf215546Sopenharmony_ci * format. This matches the sampler->BorderColor union exactly; just 2508bf215546Sopenharmony_ci * memcpy the values. 2509bf215546Sopenharmony_ci */ 2510bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGN, 32bit, color->ui); 2511bf215546Sopenharmony_ci#elif GFX_VERx10 == 75 2512bf215546Sopenharmony_ci if (is_integer_format) { 2513bf215546Sopenharmony_ci const struct util_format_description *format_desc = 2514bf215546Sopenharmony_ci util_format_description(internal_format); 2515bf215546Sopenharmony_ci 2516bf215546Sopenharmony_ci /* From the Haswell PRM, "Command Reference: Structures", Page 36: 2517bf215546Sopenharmony_ci * "If any color channel is missing from the surface format, 2518bf215546Sopenharmony_ci * corresponding border color should be programmed as zero and if 2519bf215546Sopenharmony_ci * alpha channel is missing, corresponding Alpha border color should 2520bf215546Sopenharmony_ci * be programmed as 1." 2521bf215546Sopenharmony_ci */ 2522bf215546Sopenharmony_ci unsigned c[4] = { 0, 0, 0, 1 }; 2523bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 2524bf215546Sopenharmony_ci if (format_desc->channel[i].size) 2525bf215546Sopenharmony_ci c[i] = color->ui[i]; 2526bf215546Sopenharmony_ci } 2527bf215546Sopenharmony_ci 2528bf215546Sopenharmony_ci switch (format_desc->channel[0].size) { 2529bf215546Sopenharmony_ci case 8: 2530bf215546Sopenharmony_ci /* Copy RGBA in order. */ 2531bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c); 2532bf215546Sopenharmony_ci break; 2533bf215546Sopenharmony_ci case 10: 2534bf215546Sopenharmony_ci /* R10G10B10A2_UINT is treated like a 16-bit format. */ 2535bf215546Sopenharmony_ci case 16: 2536bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c); 2537bf215546Sopenharmony_ci break; 2538bf215546Sopenharmony_ci case 32: 2539bf215546Sopenharmony_ci if (format_desc->channel[1].size && !format_desc->channel[2].size) { 2540bf215546Sopenharmony_ci /* Careful inspection of the tables reveals that for RG32 formats, 2541bf215546Sopenharmony_ci * the green channel needs to go where blue normally belongs. 2542bf215546Sopenharmony_ci */ 2543bf215546Sopenharmony_ci state.BorderColor32bitRed = c[0]; 2544bf215546Sopenharmony_ci state.BorderColor32bitBlue = c[1]; 2545bf215546Sopenharmony_ci state.BorderColor32bitAlpha = 1; 2546bf215546Sopenharmony_ci } else { 2547bf215546Sopenharmony_ci /* Copy RGBA in order. */ 2548bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGN, 32bit, c); 2549bf215546Sopenharmony_ci } 2550bf215546Sopenharmony_ci break; 2551bf215546Sopenharmony_ci default: 2552bf215546Sopenharmony_ci assert(!"Invalid number of bits per channel in integer format."); 2553bf215546Sopenharmony_ci break; 2554bf215546Sopenharmony_ci } 2555bf215546Sopenharmony_ci } else { 2556bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGN, Float, color->f); 2557bf215546Sopenharmony_ci } 2558bf215546Sopenharmony_ci#elif GFX_VER == 5 || GFX_VER == 6 2559bf215546Sopenharmony_ci BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color->f); 2560bf215546Sopenharmony_ci BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color->f); 2561bf215546Sopenharmony_ci BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color->f); 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci#define MESA_FLOAT_TO_HALF(dst, src) \ 2564bf215546Sopenharmony_ci dst = _mesa_float_to_half(src); 2565bf215546Sopenharmony_ci 2566bf215546Sopenharmony_ci BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color->f); 2567bf215546Sopenharmony_ci 2568bf215546Sopenharmony_ci#undef MESA_FLOAT_TO_HALF 2569bf215546Sopenharmony_ci 2570bf215546Sopenharmony_ci state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8; 2571bf215546Sopenharmony_ci state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8; 2572bf215546Sopenharmony_ci state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8; 2573bf215546Sopenharmony_ci state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8; 2574bf215546Sopenharmony_ci 2575bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGN, Float, color->f); 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_ci#elif GFX_VER == 4 2578bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGN, , color->f); 2579bf215546Sopenharmony_ci#else 2580bf215546Sopenharmony_ci BORDER_COLOR_ATTR(ASSIGN, Float, color->f); 2581bf215546Sopenharmony_ci#endif 2582bf215546Sopenharmony_ci 2583bf215546Sopenharmony_ci#undef ASSIGN 2584bf215546Sopenharmony_ci#undef BORDER_COLOR_ATTR 2585bf215546Sopenharmony_ci 2586bf215546Sopenharmony_ci GENX(SAMPLER_BORDER_COLOR_STATE_pack)(batch, sbc, &state); 2587bf215546Sopenharmony_ci} 2588bf215546Sopenharmony_ci 2589bf215546Sopenharmony_ci/** 2590bf215546Sopenharmony_ci * Upload the sampler states into a contiguous area of GPU memory, for 2591bf215546Sopenharmony_ci * for 3DSTATE_SAMPLER_STATE_POINTERS_*. 2592bf215546Sopenharmony_ci * 2593bf215546Sopenharmony_ci * Also fill out the border color state pointers. 2594bf215546Sopenharmony_ci */ 2595bf215546Sopenharmony_cistatic void 2596bf215546Sopenharmony_cicrocus_upload_sampler_states(struct crocus_context *ice, 2597bf215546Sopenharmony_ci struct crocus_batch *batch, gl_shader_stage stage) 2598bf215546Sopenharmony_ci{ 2599bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 2600bf215546Sopenharmony_ci const struct shader_info *info = crocus_get_shader_info(ice, stage); 2601bf215546Sopenharmony_ci 2602bf215546Sopenharmony_ci /* We assume the state tracker will call pipe->bind_sampler_states() 2603bf215546Sopenharmony_ci * if the program's number of textures changes. 2604bf215546Sopenharmony_ci */ 2605bf215546Sopenharmony_ci unsigned count = info ? BITSET_LAST_BIT(info->textures_used) : 0; 2606bf215546Sopenharmony_ci 2607bf215546Sopenharmony_ci if (!count) 2608bf215546Sopenharmony_ci return; 2609bf215546Sopenharmony_ci 2610bf215546Sopenharmony_ci /* Assemble the SAMPLER_STATEs into a contiguous table that lives 2611bf215546Sopenharmony_ci * in the dynamic state memory zone, so we can point to it via the 2612bf215546Sopenharmony_ci * 3DSTATE_SAMPLER_STATE_POINTERS_* commands. 2613bf215546Sopenharmony_ci */ 2614bf215546Sopenharmony_ci unsigned size = count * 4 * GENX(SAMPLER_STATE_length); 2615bf215546Sopenharmony_ci uint32_t *map = stream_state(batch, size, 32, &shs->sampler_offset); 2616bf215546Sopenharmony_ci 2617bf215546Sopenharmony_ci if (unlikely(!map)) 2618bf215546Sopenharmony_ci return; 2619bf215546Sopenharmony_ci 2620bf215546Sopenharmony_ci for (int i = 0; i < count; i++) { 2621bf215546Sopenharmony_ci struct crocus_sampler_state *state = shs->samplers[i]; 2622bf215546Sopenharmony_ci struct crocus_sampler_view *tex = shs->textures[i]; 2623bf215546Sopenharmony_ci 2624bf215546Sopenharmony_ci if (!state || !tex) { 2625bf215546Sopenharmony_ci memset(map, 0, 4 * GENX(SAMPLER_STATE_length)); 2626bf215546Sopenharmony_ci } else { 2627bf215546Sopenharmony_ci unsigned border_color_offset = 0; 2628bf215546Sopenharmony_ci if (state->needs_border_color) { 2629bf215546Sopenharmony_ci crocus_upload_border_color(batch, state, tex, &border_color_offset); 2630bf215546Sopenharmony_ci } 2631bf215546Sopenharmony_ci 2632bf215546Sopenharmony_ci enum samp_workaround wa = SAMP_NORMAL; 2633bf215546Sopenharmony_ci /* There's a bug in 1D texture sampling - it actually pays 2634bf215546Sopenharmony_ci * attention to the wrap_t value, though it should not. 2635bf215546Sopenharmony_ci * Override the wrap_t value here to GL_REPEAT to keep 2636bf215546Sopenharmony_ci * any nonexistent border pixels from floating in. 2637bf215546Sopenharmony_ci */ 2638bf215546Sopenharmony_ci if (tex->base.target == PIPE_TEXTURE_1D) 2639bf215546Sopenharmony_ci wa = SAMP_T_WRAP; 2640bf215546Sopenharmony_ci else if (tex->base.target == PIPE_TEXTURE_CUBE || 2641bf215546Sopenharmony_ci tex->base.target == PIPE_TEXTURE_CUBE_ARRAY) { 2642bf215546Sopenharmony_ci /* Cube maps must use the same wrap mode for all three coordinate 2643bf215546Sopenharmony_ci * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. 2644bf215546Sopenharmony_ci * 2645bf215546Sopenharmony_ci * Ivybridge and Baytrail seem to have problems with CUBE mode and 2646bf215546Sopenharmony_ci * integer formats. Fall back to CLAMP for now. 2647bf215546Sopenharmony_ci */ 2648bf215546Sopenharmony_ci if (state->pstate.seamless_cube_map && 2649bf215546Sopenharmony_ci !(GFX_VERx10 == 70 && util_format_is_pure_integer(tex->base.format))) 2650bf215546Sopenharmony_ci wa = SAMP_CUBE_CUBE; 2651bf215546Sopenharmony_ci else 2652bf215546Sopenharmony_ci wa = SAMP_CUBE_CLAMP; 2653bf215546Sopenharmony_ci } 2654bf215546Sopenharmony_ci 2655bf215546Sopenharmony_ci uint32_t first_level = 0; 2656bf215546Sopenharmony_ci if (tex->base.target != PIPE_BUFFER) 2657bf215546Sopenharmony_ci first_level = tex->base.u.tex.first_level; 2658bf215546Sopenharmony_ci 2659bf215546Sopenharmony_ci crocus_upload_sampler_state(batch, state, border_color_offset, wa, first_level, map); 2660bf215546Sopenharmony_ci } 2661bf215546Sopenharmony_ci 2662bf215546Sopenharmony_ci map += GENX(SAMPLER_STATE_length); 2663bf215546Sopenharmony_ci } 2664bf215546Sopenharmony_ci} 2665bf215546Sopenharmony_ci 2666bf215546Sopenharmony_ci/** 2667bf215546Sopenharmony_ci * The pipe->create_sampler_view() driver hook. 2668bf215546Sopenharmony_ci */ 2669bf215546Sopenharmony_cistatic struct pipe_sampler_view * 2670bf215546Sopenharmony_cicrocus_create_sampler_view(struct pipe_context *ctx, 2671bf215546Sopenharmony_ci struct pipe_resource *tex, 2672bf215546Sopenharmony_ci const struct pipe_sampler_view *tmpl) 2673bf215546Sopenharmony_ci{ 2674bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 2675bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &screen->devinfo; 2676bf215546Sopenharmony_ci struct crocus_sampler_view *isv = calloc(1, sizeof(struct crocus_sampler_view)); 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_ci if (!isv) 2679bf215546Sopenharmony_ci return NULL; 2680bf215546Sopenharmony_ci 2681bf215546Sopenharmony_ci /* initialize base object */ 2682bf215546Sopenharmony_ci isv->base = *tmpl; 2683bf215546Sopenharmony_ci isv->base.context = ctx; 2684bf215546Sopenharmony_ci isv->base.texture = NULL; 2685bf215546Sopenharmony_ci pipe_reference_init(&isv->base.reference, 1); 2686bf215546Sopenharmony_ci pipe_resource_reference(&isv->base.texture, tex); 2687bf215546Sopenharmony_ci 2688bf215546Sopenharmony_ci if (util_format_is_depth_or_stencil(tmpl->format)) { 2689bf215546Sopenharmony_ci struct crocus_resource *zres, *sres; 2690bf215546Sopenharmony_ci const struct util_format_description *desc = 2691bf215546Sopenharmony_ci util_format_description(tmpl->format); 2692bf215546Sopenharmony_ci 2693bf215546Sopenharmony_ci crocus_get_depth_stencil_resources(devinfo, tex, &zres, &sres); 2694bf215546Sopenharmony_ci 2695bf215546Sopenharmony_ci tex = util_format_has_depth(desc) ? &zres->base.b : &sres->base.b; 2696bf215546Sopenharmony_ci 2697bf215546Sopenharmony_ci if (tex->format == PIPE_FORMAT_S8_UINT) 2698bf215546Sopenharmony_ci if (GFX_VER == 7 && sres->shadow) 2699bf215546Sopenharmony_ci tex = &sres->shadow->base.b; 2700bf215546Sopenharmony_ci } 2701bf215546Sopenharmony_ci 2702bf215546Sopenharmony_ci isv->res = (struct crocus_resource *) tex; 2703bf215546Sopenharmony_ci 2704bf215546Sopenharmony_ci isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT; 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ci if (isv->base.target == PIPE_TEXTURE_CUBE || 2707bf215546Sopenharmony_ci isv->base.target == PIPE_TEXTURE_CUBE_ARRAY) 2708bf215546Sopenharmony_ci usage |= ISL_SURF_USAGE_CUBE_BIT; 2709bf215546Sopenharmony_ci 2710bf215546Sopenharmony_ci const struct crocus_format_info fmt = 2711bf215546Sopenharmony_ci crocus_format_for_usage(devinfo, tmpl->format, usage); 2712bf215546Sopenharmony_ci 2713bf215546Sopenharmony_ci enum pipe_swizzle vswz[4] = { tmpl->swizzle_r, tmpl->swizzle_g, tmpl->swizzle_b, tmpl->swizzle_a }; 2714bf215546Sopenharmony_ci crocus_combine_swizzle(isv->swizzle, fmt.swizzles, vswz); 2715bf215546Sopenharmony_ci 2716bf215546Sopenharmony_ci /* hardcode stencil swizzles - hw returns 0G01, we want GGGG */ 2717bf215546Sopenharmony_ci if (GFX_VER < 6 && 2718bf215546Sopenharmony_ci (tmpl->format == PIPE_FORMAT_X32_S8X24_UINT || 2719bf215546Sopenharmony_ci tmpl->format == PIPE_FORMAT_X24S8_UINT)) { 2720bf215546Sopenharmony_ci isv->swizzle[0] = tmpl->swizzle_g; 2721bf215546Sopenharmony_ci isv->swizzle[1] = tmpl->swizzle_g; 2722bf215546Sopenharmony_ci isv->swizzle[2] = tmpl->swizzle_g; 2723bf215546Sopenharmony_ci isv->swizzle[3] = tmpl->swizzle_g; 2724bf215546Sopenharmony_ci } 2725bf215546Sopenharmony_ci 2726bf215546Sopenharmony_ci isv->clear_color = isv->res->aux.clear_color; 2727bf215546Sopenharmony_ci 2728bf215546Sopenharmony_ci isv->view = (struct isl_view) { 2729bf215546Sopenharmony_ci .format = fmt.fmt, 2730bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 2731bf215546Sopenharmony_ci .swizzle = (struct isl_swizzle) { 2732bf215546Sopenharmony_ci .r = pipe_to_isl_swizzle(isv->swizzle[0], false), 2733bf215546Sopenharmony_ci .g = pipe_to_isl_swizzle(isv->swizzle[1], false), 2734bf215546Sopenharmony_ci .b = pipe_to_isl_swizzle(isv->swizzle[2], false), 2735bf215546Sopenharmony_ci .a = pipe_to_isl_swizzle(isv->swizzle[3], false), 2736bf215546Sopenharmony_ci }, 2737bf215546Sopenharmony_ci#else 2738bf215546Sopenharmony_ci /* swizzling handled in shader code */ 2739bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 2740bf215546Sopenharmony_ci#endif 2741bf215546Sopenharmony_ci .usage = usage, 2742bf215546Sopenharmony_ci }; 2743bf215546Sopenharmony_ci 2744bf215546Sopenharmony_ci /* Fill out SURFACE_STATE for this view. */ 2745bf215546Sopenharmony_ci if (tmpl->target != PIPE_BUFFER) { 2746bf215546Sopenharmony_ci isv->view.base_level = tmpl->u.tex.first_level; 2747bf215546Sopenharmony_ci isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1; 2748bf215546Sopenharmony_ci 2749bf215546Sopenharmony_ci /* Hardware older than skylake ignores this value */ 2750bf215546Sopenharmony_ci assert(tex->target != PIPE_TEXTURE_3D || !tmpl->u.tex.first_layer); 2751bf215546Sopenharmony_ci 2752bf215546Sopenharmony_ci // XXX: do I need to port f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2? 2753bf215546Sopenharmony_ci isv->view.base_array_layer = tmpl->u.tex.first_layer; 2754bf215546Sopenharmony_ci isv->view.array_len = 2755bf215546Sopenharmony_ci tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1; 2756bf215546Sopenharmony_ci } 2757bf215546Sopenharmony_ci#if GFX_VER >= 6 2758bf215546Sopenharmony_ci /* just create a second view struct for texture gather just in case */ 2759bf215546Sopenharmony_ci isv->gather_view = isv->view; 2760bf215546Sopenharmony_ci 2761bf215546Sopenharmony_ci#if GFX_VER == 7 2762bf215546Sopenharmony_ci if (fmt.fmt == ISL_FORMAT_R32G32_FLOAT || 2763bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_R32G32_SINT || 2764bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_R32G32_UINT) { 2765bf215546Sopenharmony_ci isv->gather_view.format = ISL_FORMAT_R32G32_FLOAT_LD; 2766bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 2767bf215546Sopenharmony_ci isv->gather_view.swizzle = (struct isl_swizzle) { 2768bf215546Sopenharmony_ci .r = pipe_to_isl_swizzle(isv->swizzle[0], GFX_VERx10 == 75), 2769bf215546Sopenharmony_ci .g = pipe_to_isl_swizzle(isv->swizzle[1], GFX_VERx10 == 75), 2770bf215546Sopenharmony_ci .b = pipe_to_isl_swizzle(isv->swizzle[2], GFX_VERx10 == 75), 2771bf215546Sopenharmony_ci .a = pipe_to_isl_swizzle(isv->swizzle[3], GFX_VERx10 == 75), 2772bf215546Sopenharmony_ci }; 2773bf215546Sopenharmony_ci#endif 2774bf215546Sopenharmony_ci } 2775bf215546Sopenharmony_ci#endif 2776bf215546Sopenharmony_ci#if GFX_VER == 6 2777bf215546Sopenharmony_ci /* Sandybridge's gather4 message is broken for integer formats. 2778bf215546Sopenharmony_ci * To work around this, we pretend the surface is UNORM for 2779bf215546Sopenharmony_ci * 8 or 16-bit formats, and emit shader instructions to recover 2780bf215546Sopenharmony_ci * the real INT/UINT value. For 32-bit formats, we pretend 2781bf215546Sopenharmony_ci * the surface is FLOAT, and simply reinterpret the resulting 2782bf215546Sopenharmony_ci * bits. 2783bf215546Sopenharmony_ci */ 2784bf215546Sopenharmony_ci switch (fmt.fmt) { 2785bf215546Sopenharmony_ci case ISL_FORMAT_R8_SINT: 2786bf215546Sopenharmony_ci case ISL_FORMAT_R8_UINT: 2787bf215546Sopenharmony_ci isv->gather_view.format = ISL_FORMAT_R8_UNORM; 2788bf215546Sopenharmony_ci break; 2789bf215546Sopenharmony_ci 2790bf215546Sopenharmony_ci case ISL_FORMAT_R16_SINT: 2791bf215546Sopenharmony_ci case ISL_FORMAT_R16_UINT: 2792bf215546Sopenharmony_ci isv->gather_view.format = ISL_FORMAT_R16_UNORM; 2793bf215546Sopenharmony_ci break; 2794bf215546Sopenharmony_ci 2795bf215546Sopenharmony_ci case ISL_FORMAT_R32_SINT: 2796bf215546Sopenharmony_ci case ISL_FORMAT_R32_UINT: 2797bf215546Sopenharmony_ci isv->gather_view.format = ISL_FORMAT_R32_FLOAT; 2798bf215546Sopenharmony_ci break; 2799bf215546Sopenharmony_ci 2800bf215546Sopenharmony_ci default: 2801bf215546Sopenharmony_ci break; 2802bf215546Sopenharmony_ci } 2803bf215546Sopenharmony_ci#endif 2804bf215546Sopenharmony_ci#endif 2805bf215546Sopenharmony_ci /* Fill out SURFACE_STATE for this view. */ 2806bf215546Sopenharmony_ci if (tmpl->target != PIPE_BUFFER) { 2807bf215546Sopenharmony_ci if (crocus_resource_unfinished_aux_import(isv->res)) 2808bf215546Sopenharmony_ci crocus_resource_finish_aux_import(&screen->base, isv->res); 2809bf215546Sopenharmony_ci 2810bf215546Sopenharmony_ci } 2811bf215546Sopenharmony_ci 2812bf215546Sopenharmony_ci return &isv->base; 2813bf215546Sopenharmony_ci} 2814bf215546Sopenharmony_ci 2815bf215546Sopenharmony_cistatic void 2816bf215546Sopenharmony_cicrocus_sampler_view_destroy(struct pipe_context *ctx, 2817bf215546Sopenharmony_ci struct pipe_sampler_view *state) 2818bf215546Sopenharmony_ci{ 2819bf215546Sopenharmony_ci struct crocus_sampler_view *isv = (void *) state; 2820bf215546Sopenharmony_ci pipe_resource_reference(&state->texture, NULL); 2821bf215546Sopenharmony_ci free(isv); 2822bf215546Sopenharmony_ci} 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_ci/** 2825bf215546Sopenharmony_ci * The pipe->create_surface() driver hook. 2826bf215546Sopenharmony_ci * 2827bf215546Sopenharmony_ci * In Gallium nomenclature, "surfaces" are a view of a resource that 2828bf215546Sopenharmony_ci * can be bound as a render target or depth/stencil buffer. 2829bf215546Sopenharmony_ci */ 2830bf215546Sopenharmony_cistatic struct pipe_surface * 2831bf215546Sopenharmony_cicrocus_create_surface(struct pipe_context *ctx, 2832bf215546Sopenharmony_ci struct pipe_resource *tex, 2833bf215546Sopenharmony_ci const struct pipe_surface *tmpl) 2834bf215546Sopenharmony_ci{ 2835bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 2836bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &screen->devinfo; 2837bf215546Sopenharmony_ci 2838bf215546Sopenharmony_ci isl_surf_usage_flags_t usage = 0; 2839bf215546Sopenharmony_ci if (tmpl->writable) 2840bf215546Sopenharmony_ci usage = ISL_SURF_USAGE_STORAGE_BIT; 2841bf215546Sopenharmony_ci else if (util_format_is_depth_or_stencil(tmpl->format)) 2842bf215546Sopenharmony_ci usage = ISL_SURF_USAGE_DEPTH_BIT; 2843bf215546Sopenharmony_ci else 2844bf215546Sopenharmony_ci usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; 2845bf215546Sopenharmony_ci 2846bf215546Sopenharmony_ci const struct crocus_format_info fmt = 2847bf215546Sopenharmony_ci crocus_format_for_usage(devinfo, tmpl->format, usage); 2848bf215546Sopenharmony_ci 2849bf215546Sopenharmony_ci if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && 2850bf215546Sopenharmony_ci !isl_format_supports_rendering(devinfo, fmt.fmt)) { 2851bf215546Sopenharmony_ci /* Framebuffer validation will reject this invalid case, but it 2852bf215546Sopenharmony_ci * hasn't had the opportunity yet. In the meantime, we need to 2853bf215546Sopenharmony_ci * avoid hitting ISL asserts about unsupported formats below. 2854bf215546Sopenharmony_ci */ 2855bf215546Sopenharmony_ci return NULL; 2856bf215546Sopenharmony_ci } 2857bf215546Sopenharmony_ci 2858bf215546Sopenharmony_ci struct crocus_surface *surf = calloc(1, sizeof(struct crocus_surface)); 2859bf215546Sopenharmony_ci struct pipe_surface *psurf = &surf->base; 2860bf215546Sopenharmony_ci struct crocus_resource *res = (struct crocus_resource *) tex; 2861bf215546Sopenharmony_ci 2862bf215546Sopenharmony_ci if (!surf) 2863bf215546Sopenharmony_ci return NULL; 2864bf215546Sopenharmony_ci 2865bf215546Sopenharmony_ci pipe_reference_init(&psurf->reference, 1); 2866bf215546Sopenharmony_ci pipe_resource_reference(&psurf->texture, tex); 2867bf215546Sopenharmony_ci psurf->context = ctx; 2868bf215546Sopenharmony_ci psurf->format = tmpl->format; 2869bf215546Sopenharmony_ci psurf->width = tex->width0; 2870bf215546Sopenharmony_ci psurf->height = tex->height0; 2871bf215546Sopenharmony_ci psurf->texture = tex; 2872bf215546Sopenharmony_ci psurf->u.tex.first_layer = tmpl->u.tex.first_layer; 2873bf215546Sopenharmony_ci psurf->u.tex.last_layer = tmpl->u.tex.last_layer; 2874bf215546Sopenharmony_ci psurf->u.tex.level = tmpl->u.tex.level; 2875bf215546Sopenharmony_ci 2876bf215546Sopenharmony_ci uint32_t array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1; 2877bf215546Sopenharmony_ci 2878bf215546Sopenharmony_ci struct isl_view *view = &surf->view; 2879bf215546Sopenharmony_ci *view = (struct isl_view) { 2880bf215546Sopenharmony_ci .format = fmt.fmt, 2881bf215546Sopenharmony_ci .base_level = tmpl->u.tex.level, 2882bf215546Sopenharmony_ci .levels = 1, 2883bf215546Sopenharmony_ci .base_array_layer = tmpl->u.tex.first_layer, 2884bf215546Sopenharmony_ci .array_len = array_len, 2885bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 2886bf215546Sopenharmony_ci .usage = usage, 2887bf215546Sopenharmony_ci }; 2888bf215546Sopenharmony_ci 2889bf215546Sopenharmony_ci#if GFX_VER >= 6 2890bf215546Sopenharmony_ci struct isl_view *read_view = &surf->read_view; 2891bf215546Sopenharmony_ci *read_view = (struct isl_view) { 2892bf215546Sopenharmony_ci .format = fmt.fmt, 2893bf215546Sopenharmony_ci .base_level = tmpl->u.tex.level, 2894bf215546Sopenharmony_ci .levels = 1, 2895bf215546Sopenharmony_ci .base_array_layer = tmpl->u.tex.first_layer, 2896bf215546Sopenharmony_ci .array_len = array_len, 2897bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 2898bf215546Sopenharmony_ci .usage = ISL_SURF_USAGE_TEXTURE_BIT, 2899bf215546Sopenharmony_ci }; 2900bf215546Sopenharmony_ci#endif 2901bf215546Sopenharmony_ci 2902bf215546Sopenharmony_ci surf->clear_color = res->aux.clear_color; 2903bf215546Sopenharmony_ci 2904bf215546Sopenharmony_ci /* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */ 2905bf215546Sopenharmony_ci if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT | 2906bf215546Sopenharmony_ci ISL_SURF_USAGE_STENCIL_BIT)) 2907bf215546Sopenharmony_ci return psurf; 2908bf215546Sopenharmony_ci 2909bf215546Sopenharmony_ci if (!isl_format_is_compressed(res->surf.format)) { 2910bf215546Sopenharmony_ci if (crocus_resource_unfinished_aux_import(res)) 2911bf215546Sopenharmony_ci crocus_resource_finish_aux_import(&screen->base, res); 2912bf215546Sopenharmony_ci 2913bf215546Sopenharmony_ci memcpy(&surf->surf, &res->surf, sizeof(surf->surf)); 2914bf215546Sopenharmony_ci uint64_t temp_offset; 2915bf215546Sopenharmony_ci uint32_t temp_x, temp_y; 2916bf215546Sopenharmony_ci 2917bf215546Sopenharmony_ci isl_surf_get_image_offset_B_tile_sa(&res->surf, tmpl->u.tex.level, 2918bf215546Sopenharmony_ci res->base.b.target == PIPE_TEXTURE_3D ? 0 : tmpl->u.tex.first_layer, 2919bf215546Sopenharmony_ci res->base.b.target == PIPE_TEXTURE_3D ? tmpl->u.tex.first_layer : 0, 2920bf215546Sopenharmony_ci &temp_offset, &temp_x, &temp_y); 2921bf215546Sopenharmony_ci if (!devinfo->has_surface_tile_offset && 2922bf215546Sopenharmony_ci (temp_x || temp_y)) { 2923bf215546Sopenharmony_ci /* Original gfx4 hardware couldn't draw to a non-tile-aligned 2924bf215546Sopenharmony_ci * destination. 2925bf215546Sopenharmony_ci */ 2926bf215546Sopenharmony_ci /* move to temp */ 2927bf215546Sopenharmony_ci struct pipe_resource wa_templ = (struct pipe_resource) { 2928bf215546Sopenharmony_ci .width0 = u_minify(res->base.b.width0, tmpl->u.tex.level), 2929bf215546Sopenharmony_ci .height0 = u_minify(res->base.b.height0, tmpl->u.tex.level), 2930bf215546Sopenharmony_ci .depth0 = 1, 2931bf215546Sopenharmony_ci .array_size = 1, 2932bf215546Sopenharmony_ci .format = res->base.b.format, 2933bf215546Sopenharmony_ci .target = PIPE_TEXTURE_2D, 2934bf215546Sopenharmony_ci .bind = (usage & ISL_SURF_USAGE_DEPTH_BIT ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET) | PIPE_BIND_SAMPLER_VIEW, 2935bf215546Sopenharmony_ci }; 2936bf215546Sopenharmony_ci surf->align_res = screen->base.resource_create(&screen->base, &wa_templ); 2937bf215546Sopenharmony_ci view->base_level = 0; 2938bf215546Sopenharmony_ci view->base_array_layer = 0; 2939bf215546Sopenharmony_ci view->array_len = 1; 2940bf215546Sopenharmony_ci struct crocus_resource *align_res = (struct crocus_resource *)surf->align_res; 2941bf215546Sopenharmony_ci memcpy(&surf->surf, &align_res->surf, sizeof(surf->surf)); 2942bf215546Sopenharmony_ci } 2943bf215546Sopenharmony_ci return psurf; 2944bf215546Sopenharmony_ci } 2945bf215546Sopenharmony_ci 2946bf215546Sopenharmony_ci /* The resource has a compressed format, which is not renderable, but we 2947bf215546Sopenharmony_ci * have a renderable view format. We must be attempting to upload blocks 2948bf215546Sopenharmony_ci * of compressed data via an uncompressed view. 2949bf215546Sopenharmony_ci * 2950bf215546Sopenharmony_ci * In this case, we can assume there are no auxiliary buffers, a single 2951bf215546Sopenharmony_ci * miplevel, and that the resource is single-sampled. Gallium may try 2952bf215546Sopenharmony_ci * and create an uncompressed view with multiple layers, however. 2953bf215546Sopenharmony_ci */ 2954bf215546Sopenharmony_ci assert(!isl_format_is_compressed(fmt.fmt)); 2955bf215546Sopenharmony_ci assert(res->surf.samples == 1); 2956bf215546Sopenharmony_ci assert(view->levels == 1); 2957bf215546Sopenharmony_ci 2958bf215546Sopenharmony_ci /* TODO: compressed pbo uploads aren't working here */ 2959bf215546Sopenharmony_ci return NULL; 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ci uint64_t offset_B = 0; 2962bf215546Sopenharmony_ci uint32_t tile_x_sa = 0, tile_y_sa = 0; 2963bf215546Sopenharmony_ci 2964bf215546Sopenharmony_ci if (view->base_level > 0) { 2965bf215546Sopenharmony_ci /* We can't rely on the hardware's miplevel selection with such 2966bf215546Sopenharmony_ci * a substantial lie about the format, so we select a single image 2967bf215546Sopenharmony_ci * using the Tile X/Y Offset fields. In this case, we can't handle 2968bf215546Sopenharmony_ci * multiple array slices. 2969bf215546Sopenharmony_ci * 2970bf215546Sopenharmony_ci * On Broadwell, HALIGN and VALIGN are specified in pixels and are 2971bf215546Sopenharmony_ci * hard-coded to align to exactly the block size of the compressed 2972bf215546Sopenharmony_ci * texture. This means that, when reinterpreted as a non-compressed 2973bf215546Sopenharmony_ci * texture, the tile offsets may be anything and we can't rely on 2974bf215546Sopenharmony_ci * X/Y Offset. 2975bf215546Sopenharmony_ci * 2976bf215546Sopenharmony_ci * Return NULL to force the state tracker to take fallback paths. 2977bf215546Sopenharmony_ci */ 2978bf215546Sopenharmony_ci // TODO: check if the gen7 check is right, originally gen8 2979bf215546Sopenharmony_ci if (view->array_len > 1 || GFX_VER == 7) 2980bf215546Sopenharmony_ci return NULL; 2981bf215546Sopenharmony_ci 2982bf215546Sopenharmony_ci const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D; 2983bf215546Sopenharmony_ci isl_surf_get_image_surf(&screen->isl_dev, &res->surf, 2984bf215546Sopenharmony_ci view->base_level, 2985bf215546Sopenharmony_ci is_3d ? 0 : view->base_array_layer, 2986bf215546Sopenharmony_ci is_3d ? view->base_array_layer : 0, 2987bf215546Sopenharmony_ci &surf->surf, 2988bf215546Sopenharmony_ci &offset_B, &tile_x_sa, &tile_y_sa); 2989bf215546Sopenharmony_ci 2990bf215546Sopenharmony_ci /* We use address and tile offsets to access a single level/layer 2991bf215546Sopenharmony_ci * as a subimage, so reset level/layer so it doesn't offset again. 2992bf215546Sopenharmony_ci */ 2993bf215546Sopenharmony_ci view->base_array_layer = 0; 2994bf215546Sopenharmony_ci view->base_level = 0; 2995bf215546Sopenharmony_ci } else { 2996bf215546Sopenharmony_ci /* Level 0 doesn't require tile offsets, and the hardware can find 2997bf215546Sopenharmony_ci * array slices using QPitch even with the format override, so we 2998bf215546Sopenharmony_ci * can allow layers in this case. Copy the original ISL surface. 2999bf215546Sopenharmony_ci */ 3000bf215546Sopenharmony_ci memcpy(&surf->surf, &res->surf, sizeof(surf->surf)); 3001bf215546Sopenharmony_ci } 3002bf215546Sopenharmony_ci 3003bf215546Sopenharmony_ci /* Scale down the image dimensions by the block size. */ 3004bf215546Sopenharmony_ci const struct isl_format_layout *fmtl = 3005bf215546Sopenharmony_ci isl_format_get_layout(res->surf.format); 3006bf215546Sopenharmony_ci surf->surf.format = fmt.fmt; 3007bf215546Sopenharmony_ci surf->surf.logical_level0_px = isl_surf_get_logical_level0_el(&surf->surf); 3008bf215546Sopenharmony_ci surf->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&surf->surf); 3009bf215546Sopenharmony_ci tile_x_sa /= fmtl->bw; 3010bf215546Sopenharmony_ci tile_y_sa /= fmtl->bh; 3011bf215546Sopenharmony_ci 3012bf215546Sopenharmony_ci psurf->width = surf->surf.logical_level0_px.width; 3013bf215546Sopenharmony_ci psurf->height = surf->surf.logical_level0_px.height; 3014bf215546Sopenharmony_ci 3015bf215546Sopenharmony_ci return psurf; 3016bf215546Sopenharmony_ci} 3017bf215546Sopenharmony_ci 3018bf215546Sopenharmony_ci#if GFX_VER >= 7 3019bf215546Sopenharmony_cistatic void 3020bf215546Sopenharmony_cifill_default_image_param(struct brw_image_param *param) 3021bf215546Sopenharmony_ci{ 3022bf215546Sopenharmony_ci memset(param, 0, sizeof(*param)); 3023bf215546Sopenharmony_ci /* Set the swizzling shifts to all-ones to effectively disable swizzling -- 3024bf215546Sopenharmony_ci * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more 3025bf215546Sopenharmony_ci * detailed explanation of these parameters. 3026bf215546Sopenharmony_ci */ 3027bf215546Sopenharmony_ci param->swizzling[0] = 0xff; 3028bf215546Sopenharmony_ci param->swizzling[1] = 0xff; 3029bf215546Sopenharmony_ci} 3030bf215546Sopenharmony_ci 3031bf215546Sopenharmony_cistatic void 3032bf215546Sopenharmony_cifill_buffer_image_param(struct brw_image_param *param, 3033bf215546Sopenharmony_ci enum pipe_format pfmt, 3034bf215546Sopenharmony_ci unsigned size) 3035bf215546Sopenharmony_ci{ 3036bf215546Sopenharmony_ci const unsigned cpp = util_format_get_blocksize(pfmt); 3037bf215546Sopenharmony_ci 3038bf215546Sopenharmony_ci fill_default_image_param(param); 3039bf215546Sopenharmony_ci param->size[0] = size / cpp; 3040bf215546Sopenharmony_ci param->stride[0] = cpp; 3041bf215546Sopenharmony_ci} 3042bf215546Sopenharmony_ci 3043bf215546Sopenharmony_ci#endif 3044bf215546Sopenharmony_ci 3045bf215546Sopenharmony_ci/** 3046bf215546Sopenharmony_ci * The pipe->set_shader_images() driver hook. 3047bf215546Sopenharmony_ci */ 3048bf215546Sopenharmony_cistatic void 3049bf215546Sopenharmony_cicrocus_set_shader_images(struct pipe_context *ctx, 3050bf215546Sopenharmony_ci enum pipe_shader_type p_stage, 3051bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 3052bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, 3053bf215546Sopenharmony_ci const struct pipe_image_view *p_images) 3054bf215546Sopenharmony_ci{ 3055bf215546Sopenharmony_ci#if GFX_VER >= 7 3056bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3057bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 3058bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &screen->devinfo; 3059bf215546Sopenharmony_ci gl_shader_stage stage = stage_from_pipe(p_stage); 3060bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 3061bf215546Sopenharmony_ci struct crocus_genx_state *genx = ice->state.genx; 3062bf215546Sopenharmony_ci struct brw_image_param *image_params = genx->shaders[stage].image_param; 3063bf215546Sopenharmony_ci 3064bf215546Sopenharmony_ci shs->bound_image_views &= ~u_bit_consecutive(start_slot, count); 3065bf215546Sopenharmony_ci 3066bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) { 3067bf215546Sopenharmony_ci struct crocus_image_view *iv = &shs->image[start_slot + i]; 3068bf215546Sopenharmony_ci 3069bf215546Sopenharmony_ci if (p_images && p_images[i].resource) { 3070bf215546Sopenharmony_ci const struct pipe_image_view *img = &p_images[i]; 3071bf215546Sopenharmony_ci struct crocus_resource *res = (void *) img->resource; 3072bf215546Sopenharmony_ci 3073bf215546Sopenharmony_ci util_copy_image_view(&iv->base, img); 3074bf215546Sopenharmony_ci 3075bf215546Sopenharmony_ci shs->bound_image_views |= 1 << (start_slot + i); 3076bf215546Sopenharmony_ci 3077bf215546Sopenharmony_ci res->bind_history |= PIPE_BIND_SHADER_IMAGE; 3078bf215546Sopenharmony_ci res->bind_stages |= 1 << stage; 3079bf215546Sopenharmony_ci 3080bf215546Sopenharmony_ci isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT; 3081bf215546Sopenharmony_ci struct crocus_format_info fmt = 3082bf215546Sopenharmony_ci crocus_format_for_usage(devinfo, img->format, usage); 3083bf215546Sopenharmony_ci 3084bf215546Sopenharmony_ci struct isl_swizzle swiz = pipe_to_isl_swizzles(fmt.swizzles); 3085bf215546Sopenharmony_ci if (img->shader_access & PIPE_IMAGE_ACCESS_READ) { 3086bf215546Sopenharmony_ci /* On Gen8, try to use typed surfaces reads (which support a 3087bf215546Sopenharmony_ci * limited number of formats), and if not possible, fall back 3088bf215546Sopenharmony_ci * to untyped reads. 3089bf215546Sopenharmony_ci */ 3090bf215546Sopenharmony_ci if (!isl_has_matching_typed_storage_image_format(devinfo, fmt.fmt)) 3091bf215546Sopenharmony_ci fmt.fmt = ISL_FORMAT_RAW; 3092bf215546Sopenharmony_ci else 3093bf215546Sopenharmony_ci fmt.fmt = isl_lower_storage_image_format(devinfo, fmt.fmt); 3094bf215546Sopenharmony_ci } 3095bf215546Sopenharmony_ci 3096bf215546Sopenharmony_ci if (res->base.b.target != PIPE_BUFFER) { 3097bf215546Sopenharmony_ci struct isl_view view = { 3098bf215546Sopenharmony_ci .format = fmt.fmt, 3099bf215546Sopenharmony_ci .base_level = img->u.tex.level, 3100bf215546Sopenharmony_ci .levels = 1, 3101bf215546Sopenharmony_ci .base_array_layer = img->u.tex.first_layer, 3102bf215546Sopenharmony_ci .array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1, 3103bf215546Sopenharmony_ci .swizzle = swiz, 3104bf215546Sopenharmony_ci .usage = usage, 3105bf215546Sopenharmony_ci }; 3106bf215546Sopenharmony_ci 3107bf215546Sopenharmony_ci iv->view = view; 3108bf215546Sopenharmony_ci 3109bf215546Sopenharmony_ci isl_surf_fill_image_param(&screen->isl_dev, 3110bf215546Sopenharmony_ci &image_params[start_slot + i], 3111bf215546Sopenharmony_ci &res->surf, &view); 3112bf215546Sopenharmony_ci } else { 3113bf215546Sopenharmony_ci struct isl_view view = { 3114bf215546Sopenharmony_ci .format = fmt.fmt, 3115bf215546Sopenharmony_ci .swizzle = swiz, 3116bf215546Sopenharmony_ci .usage = usage, 3117bf215546Sopenharmony_ci }; 3118bf215546Sopenharmony_ci iv->view = view; 3119bf215546Sopenharmony_ci 3120bf215546Sopenharmony_ci util_range_add(&res->base.b, &res->valid_buffer_range, img->u.buf.offset, 3121bf215546Sopenharmony_ci img->u.buf.offset + img->u.buf.size); 3122bf215546Sopenharmony_ci fill_buffer_image_param(&image_params[start_slot + i], 3123bf215546Sopenharmony_ci img->format, img->u.buf.size); 3124bf215546Sopenharmony_ci } 3125bf215546Sopenharmony_ci } else { 3126bf215546Sopenharmony_ci pipe_resource_reference(&iv->base.resource, NULL); 3127bf215546Sopenharmony_ci fill_default_image_param(&image_params[start_slot + i]); 3128bf215546Sopenharmony_ci } 3129bf215546Sopenharmony_ci } 3130bf215546Sopenharmony_ci 3131bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage; 3132bf215546Sopenharmony_ci ice->state.dirty |= 3133bf215546Sopenharmony_ci stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES 3134bf215546Sopenharmony_ci : CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 3135bf215546Sopenharmony_ci 3136bf215546Sopenharmony_ci /* Broadwell also needs brw_image_params re-uploaded */ 3137bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage; 3138bf215546Sopenharmony_ci shs->sysvals_need_upload = true; 3139bf215546Sopenharmony_ci#endif 3140bf215546Sopenharmony_ci} 3141bf215546Sopenharmony_ci 3142bf215546Sopenharmony_ci 3143bf215546Sopenharmony_ci/** 3144bf215546Sopenharmony_ci * The pipe->set_sampler_views() driver hook. 3145bf215546Sopenharmony_ci */ 3146bf215546Sopenharmony_cistatic void 3147bf215546Sopenharmony_cicrocus_set_sampler_views(struct pipe_context *ctx, 3148bf215546Sopenharmony_ci enum pipe_shader_type p_stage, 3149bf215546Sopenharmony_ci unsigned start, unsigned count, 3150bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, 3151bf215546Sopenharmony_ci bool take_ownership, 3152bf215546Sopenharmony_ci struct pipe_sampler_view **views) 3153bf215546Sopenharmony_ci{ 3154bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3155bf215546Sopenharmony_ci gl_shader_stage stage = stage_from_pipe(p_stage); 3156bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 3157bf215546Sopenharmony_ci 3158bf215546Sopenharmony_ci shs->bound_sampler_views &= ~u_bit_consecutive(start, count); 3159bf215546Sopenharmony_ci 3160bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) { 3161bf215546Sopenharmony_ci struct pipe_sampler_view *pview = views ? views[i] : NULL; 3162bf215546Sopenharmony_ci 3163bf215546Sopenharmony_ci if (take_ownership) { 3164bf215546Sopenharmony_ci pipe_sampler_view_reference((struct pipe_sampler_view **) 3165bf215546Sopenharmony_ci &shs->textures[start + i], NULL); 3166bf215546Sopenharmony_ci shs->textures[start + i] = (struct crocus_sampler_view *)pview; 3167bf215546Sopenharmony_ci } else { 3168bf215546Sopenharmony_ci pipe_sampler_view_reference((struct pipe_sampler_view **) 3169bf215546Sopenharmony_ci &shs->textures[start + i], pview); 3170bf215546Sopenharmony_ci } 3171bf215546Sopenharmony_ci 3172bf215546Sopenharmony_ci struct crocus_sampler_view *view = (void *) pview; 3173bf215546Sopenharmony_ci if (view) { 3174bf215546Sopenharmony_ci view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW; 3175bf215546Sopenharmony_ci view->res->bind_stages |= 1 << stage; 3176bf215546Sopenharmony_ci 3177bf215546Sopenharmony_ci shs->bound_sampler_views |= 1 << (start + i); 3178bf215546Sopenharmony_ci } 3179bf215546Sopenharmony_ci } 3180bf215546Sopenharmony_ci#if GFX_VER == 6 3181bf215546Sopenharmony_ci /* first level parameters to crocus_upload_sampler_state is gfx6 only */ 3182bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage; 3183bf215546Sopenharmony_ci#endif 3184bf215546Sopenharmony_ci ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage); 3185bf215546Sopenharmony_ci ice->state.dirty |= 3186bf215546Sopenharmony_ci stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES 3187bf215546Sopenharmony_ci : CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 3188bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES]; 3189bf215546Sopenharmony_ci} 3190bf215546Sopenharmony_ci 3191bf215546Sopenharmony_ci/** 3192bf215546Sopenharmony_ci * The pipe->set_tess_state() driver hook. 3193bf215546Sopenharmony_ci */ 3194bf215546Sopenharmony_cistatic void 3195bf215546Sopenharmony_cicrocus_set_tess_state(struct pipe_context *ctx, 3196bf215546Sopenharmony_ci const float default_outer_level[4], 3197bf215546Sopenharmony_ci const float default_inner_level[2]) 3198bf215546Sopenharmony_ci{ 3199bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3200bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL]; 3201bf215546Sopenharmony_ci 3202bf215546Sopenharmony_ci memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float)); 3203bf215546Sopenharmony_ci memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float)); 3204bf215546Sopenharmony_ci 3205bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS; 3206bf215546Sopenharmony_ci shs->sysvals_need_upload = true; 3207bf215546Sopenharmony_ci} 3208bf215546Sopenharmony_ci 3209bf215546Sopenharmony_cistatic void 3210bf215546Sopenharmony_cicrocus_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 3211bf215546Sopenharmony_ci{ 3212bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3213bf215546Sopenharmony_ci 3214bf215546Sopenharmony_ci ice->state.patch_vertices = patch_vertices; 3215bf215546Sopenharmony_ci} 3216bf215546Sopenharmony_ci 3217bf215546Sopenharmony_cistatic void 3218bf215546Sopenharmony_cicrocus_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf) 3219bf215546Sopenharmony_ci{ 3220bf215546Sopenharmony_ci struct crocus_surface *surf = (void *) p_surf; 3221bf215546Sopenharmony_ci pipe_resource_reference(&p_surf->texture, NULL); 3222bf215546Sopenharmony_ci 3223bf215546Sopenharmony_ci pipe_resource_reference(&surf->align_res, NULL); 3224bf215546Sopenharmony_ci free(surf); 3225bf215546Sopenharmony_ci} 3226bf215546Sopenharmony_ci 3227bf215546Sopenharmony_cistatic void 3228bf215546Sopenharmony_cicrocus_set_clip_state(struct pipe_context *ctx, 3229bf215546Sopenharmony_ci const struct pipe_clip_state *state) 3230bf215546Sopenharmony_ci{ 3231bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3232bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX]; 3233bf215546Sopenharmony_ci struct crocus_shader_state *gshs = &ice->state.shaders[MESA_SHADER_GEOMETRY]; 3234bf215546Sopenharmony_ci struct crocus_shader_state *tshs = &ice->state.shaders[MESA_SHADER_TESS_EVAL]; 3235bf215546Sopenharmony_ci 3236bf215546Sopenharmony_ci memcpy(&ice->state.clip_planes, state, sizeof(*state)); 3237bf215546Sopenharmony_ci 3238bf215546Sopenharmony_ci#if GFX_VER <= 5 3239bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE; 3240bf215546Sopenharmony_ci#endif 3241bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS | CROCUS_STAGE_DIRTY_CONSTANTS_GS | 3242bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_TES; 3243bf215546Sopenharmony_ci shs->sysvals_need_upload = true; 3244bf215546Sopenharmony_ci gshs->sysvals_need_upload = true; 3245bf215546Sopenharmony_ci tshs->sysvals_need_upload = true; 3246bf215546Sopenharmony_ci} 3247bf215546Sopenharmony_ci 3248bf215546Sopenharmony_ci/** 3249bf215546Sopenharmony_ci * The pipe->set_polygon_stipple() driver hook. 3250bf215546Sopenharmony_ci */ 3251bf215546Sopenharmony_cistatic void 3252bf215546Sopenharmony_cicrocus_set_polygon_stipple(struct pipe_context *ctx, 3253bf215546Sopenharmony_ci const struct pipe_poly_stipple *state) 3254bf215546Sopenharmony_ci{ 3255bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3256bf215546Sopenharmony_ci memcpy(&ice->state.poly_stipple, state, sizeof(*state)); 3257bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_POLYGON_STIPPLE; 3258bf215546Sopenharmony_ci} 3259bf215546Sopenharmony_ci 3260bf215546Sopenharmony_ci/** 3261bf215546Sopenharmony_ci * The pipe->set_sample_mask() driver hook. 3262bf215546Sopenharmony_ci */ 3263bf215546Sopenharmony_cistatic void 3264bf215546Sopenharmony_cicrocus_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3265bf215546Sopenharmony_ci{ 3266bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3267bf215546Sopenharmony_ci 3268bf215546Sopenharmony_ci /* We only support 16x MSAA, so we have 16 bits of sample maks. 3269bf215546Sopenharmony_ci * st/mesa may pass us 0xffffffff though, meaning "enable all samples". 3270bf215546Sopenharmony_ci */ 3271bf215546Sopenharmony_ci ice->state.sample_mask = sample_mask & 0xff; 3272bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK; 3273bf215546Sopenharmony_ci} 3274bf215546Sopenharmony_ci 3275bf215546Sopenharmony_cistatic void 3276bf215546Sopenharmony_cicrocus_fill_scissor_rect(struct crocus_context *ice, 3277bf215546Sopenharmony_ci int idx, 3278bf215546Sopenharmony_ci struct pipe_scissor_state *ss) 3279bf215546Sopenharmony_ci{ 3280bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 3281bf215546Sopenharmony_ci struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso; 3282bf215546Sopenharmony_ci const struct pipe_viewport_state *vp = &ice->state.viewports[idx]; 3283bf215546Sopenharmony_ci struct pipe_scissor_state scissor = (struct pipe_scissor_state) { 3284bf215546Sopenharmony_ci .minx = MAX2(-fabsf(vp->scale[0]) + vp->translate[0], 0), 3285bf215546Sopenharmony_ci .maxx = MIN2( fabsf(vp->scale[0]) + vp->translate[0], cso_fb->width) - 1, 3286bf215546Sopenharmony_ci .miny = MAX2(-fabsf(vp->scale[1]) + vp->translate[1], 0), 3287bf215546Sopenharmony_ci .maxy = MIN2( fabsf(vp->scale[1]) + vp->translate[1], cso_fb->height) - 1, 3288bf215546Sopenharmony_ci }; 3289bf215546Sopenharmony_ci if (cso_state->scissor) { 3290bf215546Sopenharmony_ci struct pipe_scissor_state *s = &ice->state.scissors[idx]; 3291bf215546Sopenharmony_ci scissor.minx = MAX2(scissor.minx, s->minx); 3292bf215546Sopenharmony_ci scissor.miny = MAX2(scissor.miny, s->miny); 3293bf215546Sopenharmony_ci scissor.maxx = MIN2(scissor.maxx, s->maxx); 3294bf215546Sopenharmony_ci scissor.maxy = MIN2(scissor.maxy, s->maxy); 3295bf215546Sopenharmony_ci } 3296bf215546Sopenharmony_ci *ss = scissor; 3297bf215546Sopenharmony_ci} 3298bf215546Sopenharmony_ci 3299bf215546Sopenharmony_ci/** 3300bf215546Sopenharmony_ci * The pipe->set_scissor_states() driver hook. 3301bf215546Sopenharmony_ci * 3302bf215546Sopenharmony_ci * This corresponds to our SCISSOR_RECT state structures. It's an 3303bf215546Sopenharmony_ci * exact match, so we just store them, and memcpy them out later. 3304bf215546Sopenharmony_ci */ 3305bf215546Sopenharmony_cistatic void 3306bf215546Sopenharmony_cicrocus_set_scissor_states(struct pipe_context *ctx, 3307bf215546Sopenharmony_ci unsigned start_slot, 3308bf215546Sopenharmony_ci unsigned num_scissors, 3309bf215546Sopenharmony_ci const struct pipe_scissor_state *rects) 3310bf215546Sopenharmony_ci{ 3311bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3312bf215546Sopenharmony_ci 3313bf215546Sopenharmony_ci for (unsigned i = 0; i < num_scissors; i++) { 3314bf215546Sopenharmony_ci if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) { 3315bf215546Sopenharmony_ci /* If the scissor was out of bounds and got clamped to 0 width/height 3316bf215546Sopenharmony_ci * at the bounds, the subtraction of 1 from maximums could produce a 3317bf215546Sopenharmony_ci * negative number and thus not clip anything. Instead, just provide 3318bf215546Sopenharmony_ci * a min > max scissor inside the bounds, which produces the expected 3319bf215546Sopenharmony_ci * no rendering. 3320bf215546Sopenharmony_ci */ 3321bf215546Sopenharmony_ci ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) { 3322bf215546Sopenharmony_ci .minx = 1, .maxx = 0, .miny = 1, .maxy = 0, 3323bf215546Sopenharmony_ci }; 3324bf215546Sopenharmony_ci } else { 3325bf215546Sopenharmony_ci ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) { 3326bf215546Sopenharmony_ci .minx = rects[i].minx, .miny = rects[i].miny, 3327bf215546Sopenharmony_ci .maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1, 3328bf215546Sopenharmony_ci }; 3329bf215546Sopenharmony_ci } 3330bf215546Sopenharmony_ci } 3331bf215546Sopenharmony_ci 3332bf215546Sopenharmony_ci#if GFX_VER < 6 3333bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RASTER; /* SF state */ 3334bf215546Sopenharmony_ci#else 3335bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT; 3336bf215546Sopenharmony_ci#endif 3337bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT; 3338bf215546Sopenharmony_ci 3339bf215546Sopenharmony_ci} 3340bf215546Sopenharmony_ci 3341bf215546Sopenharmony_ci/** 3342bf215546Sopenharmony_ci * The pipe->set_stencil_ref() driver hook. 3343bf215546Sopenharmony_ci * 3344bf215546Sopenharmony_ci * This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time. 3345bf215546Sopenharmony_ci */ 3346bf215546Sopenharmony_cistatic void 3347bf215546Sopenharmony_cicrocus_set_stencil_ref(struct pipe_context *ctx, 3348bf215546Sopenharmony_ci const struct pipe_stencil_ref ref) 3349bf215546Sopenharmony_ci{ 3350bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3351bf215546Sopenharmony_ci ice->state.stencil_ref = ref; 3352bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE; 3353bf215546Sopenharmony_ci} 3354bf215546Sopenharmony_ci 3355bf215546Sopenharmony_ci#if GFX_VER == 8 3356bf215546Sopenharmony_cistatic float 3357bf215546Sopenharmony_civiewport_extent(const struct pipe_viewport_state *state, int axis, float sign) 3358bf215546Sopenharmony_ci{ 3359bf215546Sopenharmony_ci return copysignf(state->scale[axis], sign) + state->translate[axis]; 3360bf215546Sopenharmony_ci} 3361bf215546Sopenharmony_ci#endif 3362bf215546Sopenharmony_ci 3363bf215546Sopenharmony_ci/** 3364bf215546Sopenharmony_ci * The pipe->set_viewport_states() driver hook. 3365bf215546Sopenharmony_ci * 3366bf215546Sopenharmony_ci * This corresponds to our SF_CLIP_VIEWPORT states. We can't calculate 3367bf215546Sopenharmony_ci * the guardband yet, as we need the framebuffer dimensions, but we can 3368bf215546Sopenharmony_ci * at least fill out the rest. 3369bf215546Sopenharmony_ci */ 3370bf215546Sopenharmony_cistatic void 3371bf215546Sopenharmony_cicrocus_set_viewport_states(struct pipe_context *ctx, 3372bf215546Sopenharmony_ci unsigned start_slot, 3373bf215546Sopenharmony_ci unsigned count, 3374bf215546Sopenharmony_ci const struct pipe_viewport_state *states) 3375bf215546Sopenharmony_ci{ 3376bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3377bf215546Sopenharmony_ci 3378bf215546Sopenharmony_ci memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count); 3379bf215546Sopenharmony_ci 3380bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT; 3381bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RASTER; 3382bf215546Sopenharmony_ci#if GFX_VER >= 6 3383bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT; 3384bf215546Sopenharmony_ci#endif 3385bf215546Sopenharmony_ci 3386bf215546Sopenharmony_ci if (ice->state.cso_rast && (!ice->state.cso_rast->cso.depth_clip_near || 3387bf215546Sopenharmony_ci !ice->state.cso_rast->cso.depth_clip_far)) 3388bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT; 3389bf215546Sopenharmony_ci} 3390bf215546Sopenharmony_ci 3391bf215546Sopenharmony_ci/** 3392bf215546Sopenharmony_ci * The pipe->set_framebuffer_state() driver hook. 3393bf215546Sopenharmony_ci * 3394bf215546Sopenharmony_ci * Sets the current draw FBO, including color render targets, depth, 3395bf215546Sopenharmony_ci * and stencil buffers. 3396bf215546Sopenharmony_ci */ 3397bf215546Sopenharmony_cistatic void 3398bf215546Sopenharmony_cicrocus_set_framebuffer_state(struct pipe_context *ctx, 3399bf215546Sopenharmony_ci const struct pipe_framebuffer_state *state) 3400bf215546Sopenharmony_ci{ 3401bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3402bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso = &ice->state.framebuffer; 3403bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 3404bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &screen->devinfo; 3405bf215546Sopenharmony_ci#if 0 3406bf215546Sopenharmony_ci struct isl_device *isl_dev = &screen->isl_dev; 3407bf215546Sopenharmony_ci struct crocus_resource *zres; 3408bf215546Sopenharmony_ci struct crocus_resource *stencil_res; 3409bf215546Sopenharmony_ci#endif 3410bf215546Sopenharmony_ci 3411bf215546Sopenharmony_ci unsigned samples = util_framebuffer_get_num_samples(state); 3412bf215546Sopenharmony_ci unsigned layers = util_framebuffer_get_num_layers(state); 3413bf215546Sopenharmony_ci 3414bf215546Sopenharmony_ci#if GFX_VER >= 6 3415bf215546Sopenharmony_ci if (cso->samples != samples) { 3416bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE; 3417bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK; 3418bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RASTER; 3419bf215546Sopenharmony_ci#if GFX_VERx10 == 75 3420bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS; 3421bf215546Sopenharmony_ci#endif 3422bf215546Sopenharmony_ci } 3423bf215546Sopenharmony_ci#endif 3424bf215546Sopenharmony_ci 3425bf215546Sopenharmony_ci#if GFX_VER >= 6 && GFX_VER < 8 3426bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE; 3427bf215546Sopenharmony_ci#endif 3428bf215546Sopenharmony_ci 3429bf215546Sopenharmony_ci if ((cso->layers == 0) != (layers == 0)) { 3430bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_CLIP; 3431bf215546Sopenharmony_ci } 3432bf215546Sopenharmony_ci 3433bf215546Sopenharmony_ci if (cso->width != state->width || cso->height != state->height) { 3434bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT; 3435bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RASTER; 3436bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_DRAWING_RECTANGLE; 3437bf215546Sopenharmony_ci#if GFX_VER >= 6 3438bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT; 3439bf215546Sopenharmony_ci#endif 3440bf215546Sopenharmony_ci } 3441bf215546Sopenharmony_ci 3442bf215546Sopenharmony_ci if (cso->zsbuf || state->zsbuf) { 3443bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER; 3444bf215546Sopenharmony_ci 3445bf215546Sopenharmony_ci /* update SF's depth buffer format */ 3446bf215546Sopenharmony_ci if (GFX_VER == 7 && cso->zsbuf) 3447bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RASTER; 3448bf215546Sopenharmony_ci } 3449bf215546Sopenharmony_ci 3450bf215546Sopenharmony_ci /* wm thread dispatch enable */ 3451bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_WM; 3452bf215546Sopenharmony_ci util_copy_framebuffer_state(cso, state); 3453bf215546Sopenharmony_ci cso->samples = samples; 3454bf215546Sopenharmony_ci cso->layers = layers; 3455bf215546Sopenharmony_ci 3456bf215546Sopenharmony_ci if (cso->zsbuf) { 3457bf215546Sopenharmony_ci struct crocus_resource *zres; 3458bf215546Sopenharmony_ci struct crocus_resource *stencil_res; 3459bf215546Sopenharmony_ci enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE; 3460bf215546Sopenharmony_ci crocus_get_depth_stencil_resources(devinfo, cso->zsbuf->texture, &zres, 3461bf215546Sopenharmony_ci &stencil_res); 3462bf215546Sopenharmony_ci if (zres && crocus_resource_level_has_hiz(zres, cso->zsbuf->u.tex.level)) { 3463bf215546Sopenharmony_ci aux_usage = zres->aux.usage; 3464bf215546Sopenharmony_ci } 3465bf215546Sopenharmony_ci ice->state.hiz_usage = aux_usage; 3466bf215546Sopenharmony_ci } 3467bf215546Sopenharmony_ci 3468bf215546Sopenharmony_ci /* Render target change */ 3469bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS; 3470bf215546Sopenharmony_ci 3471bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES; 3472bf215546Sopenharmony_ci 3473bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_FRAMEBUFFER]; 3474bf215546Sopenharmony_ci} 3475bf215546Sopenharmony_ci 3476bf215546Sopenharmony_ci/** 3477bf215546Sopenharmony_ci * The pipe->set_constant_buffer() driver hook. 3478bf215546Sopenharmony_ci * 3479bf215546Sopenharmony_ci * This uploads any constant data in user buffers, and references 3480bf215546Sopenharmony_ci * any UBO resources containing constant data. 3481bf215546Sopenharmony_ci */ 3482bf215546Sopenharmony_cistatic void 3483bf215546Sopenharmony_cicrocus_set_constant_buffer(struct pipe_context *ctx, 3484bf215546Sopenharmony_ci enum pipe_shader_type p_stage, unsigned index, 3485bf215546Sopenharmony_ci bool take_ownership, 3486bf215546Sopenharmony_ci const struct pipe_constant_buffer *input) 3487bf215546Sopenharmony_ci{ 3488bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3489bf215546Sopenharmony_ci gl_shader_stage stage = stage_from_pipe(p_stage); 3490bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 3491bf215546Sopenharmony_ci struct pipe_constant_buffer *cbuf = &shs->constbufs[index]; 3492bf215546Sopenharmony_ci 3493bf215546Sopenharmony_ci util_copy_constant_buffer(&shs->constbufs[index], input, take_ownership); 3494bf215546Sopenharmony_ci 3495bf215546Sopenharmony_ci if (input && input->buffer_size && (input->buffer || input->user_buffer)) { 3496bf215546Sopenharmony_ci shs->bound_cbufs |= 1u << index; 3497bf215546Sopenharmony_ci 3498bf215546Sopenharmony_ci if (input->user_buffer) { 3499bf215546Sopenharmony_ci void *map = NULL; 3500bf215546Sopenharmony_ci pipe_resource_reference(&cbuf->buffer, NULL); 3501bf215546Sopenharmony_ci u_upload_alloc(ice->ctx.const_uploader, 0, input->buffer_size, 64, 3502bf215546Sopenharmony_ci &cbuf->buffer_offset, &cbuf->buffer, (void **) &map); 3503bf215546Sopenharmony_ci 3504bf215546Sopenharmony_ci if (!cbuf->buffer) { 3505bf215546Sopenharmony_ci /* Allocation was unsuccessful - just unbind */ 3506bf215546Sopenharmony_ci crocus_set_constant_buffer(ctx, p_stage, index, false, NULL); 3507bf215546Sopenharmony_ci return; 3508bf215546Sopenharmony_ci } 3509bf215546Sopenharmony_ci 3510bf215546Sopenharmony_ci assert(map); 3511bf215546Sopenharmony_ci memcpy(map, input->user_buffer, input->buffer_size); 3512bf215546Sopenharmony_ci } 3513bf215546Sopenharmony_ci cbuf->buffer_size = 3514bf215546Sopenharmony_ci MIN2(input->buffer_size, 3515bf215546Sopenharmony_ci crocus_resource_bo(cbuf->buffer)->size - cbuf->buffer_offset); 3516bf215546Sopenharmony_ci 3517bf215546Sopenharmony_ci struct crocus_resource *res = (void *) cbuf->buffer; 3518bf215546Sopenharmony_ci res->bind_history |= PIPE_BIND_CONSTANT_BUFFER; 3519bf215546Sopenharmony_ci res->bind_stages |= 1 << stage; 3520bf215546Sopenharmony_ci } else { 3521bf215546Sopenharmony_ci shs->bound_cbufs &= ~(1u << index); 3522bf215546Sopenharmony_ci } 3523bf215546Sopenharmony_ci 3524bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage; 3525bf215546Sopenharmony_ci} 3526bf215546Sopenharmony_ci 3527bf215546Sopenharmony_cistatic void 3528bf215546Sopenharmony_ciupload_sysvals(struct crocus_context *ice, 3529bf215546Sopenharmony_ci gl_shader_stage stage) 3530bf215546Sopenharmony_ci{ 3531bf215546Sopenharmony_ci UNUSED struct crocus_genx_state *genx = ice->state.genx; 3532bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 3533bf215546Sopenharmony_ci 3534bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[stage]; 3535bf215546Sopenharmony_ci if (!shader || shader->num_system_values == 0) 3536bf215546Sopenharmony_ci return; 3537bf215546Sopenharmony_ci 3538bf215546Sopenharmony_ci assert(shader->num_cbufs > 0); 3539bf215546Sopenharmony_ci 3540bf215546Sopenharmony_ci unsigned sysval_cbuf_index = shader->num_cbufs - 1; 3541bf215546Sopenharmony_ci struct pipe_constant_buffer *cbuf = &shs->constbufs[sysval_cbuf_index]; 3542bf215546Sopenharmony_ci unsigned upload_size = shader->num_system_values * sizeof(uint32_t); 3543bf215546Sopenharmony_ci uint32_t *map = NULL; 3544bf215546Sopenharmony_ci 3545bf215546Sopenharmony_ci assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS); 3546bf215546Sopenharmony_ci u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64, 3547bf215546Sopenharmony_ci &cbuf->buffer_offset, &cbuf->buffer, (void **) &map); 3548bf215546Sopenharmony_ci 3549bf215546Sopenharmony_ci for (int i = 0; i < shader->num_system_values; i++) { 3550bf215546Sopenharmony_ci uint32_t sysval = shader->system_values[i]; 3551bf215546Sopenharmony_ci uint32_t value = 0; 3552bf215546Sopenharmony_ci 3553bf215546Sopenharmony_ci if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) { 3554bf215546Sopenharmony_ci#if GFX_VER >= 7 3555bf215546Sopenharmony_ci unsigned img = BRW_PARAM_IMAGE_IDX(sysval); 3556bf215546Sopenharmony_ci unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval); 3557bf215546Sopenharmony_ci struct brw_image_param *param = 3558bf215546Sopenharmony_ci &genx->shaders[stage].image_param[img]; 3559bf215546Sopenharmony_ci 3560bf215546Sopenharmony_ci assert(offset < sizeof(struct brw_image_param)); 3561bf215546Sopenharmony_ci value = ((uint32_t *) param)[offset]; 3562bf215546Sopenharmony_ci#endif 3563bf215546Sopenharmony_ci } else if (sysval == BRW_PARAM_BUILTIN_ZERO) { 3564bf215546Sopenharmony_ci value = 0; 3565bf215546Sopenharmony_ci } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) { 3566bf215546Sopenharmony_ci int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval); 3567bf215546Sopenharmony_ci int comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval); 3568bf215546Sopenharmony_ci value = fui(ice->state.clip_planes.ucp[plane][comp]); 3569bf215546Sopenharmony_ci } else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) { 3570bf215546Sopenharmony_ci if (stage == MESA_SHADER_TESS_CTRL) { 3571bf215546Sopenharmony_ci value = ice->state.vertices_per_patch; 3572bf215546Sopenharmony_ci } else { 3573bf215546Sopenharmony_ci assert(stage == MESA_SHADER_TESS_EVAL); 3574bf215546Sopenharmony_ci const struct shader_info *tcs_info = 3575bf215546Sopenharmony_ci crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL); 3576bf215546Sopenharmony_ci if (tcs_info) 3577bf215546Sopenharmony_ci value = tcs_info->tess.tcs_vertices_out; 3578bf215546Sopenharmony_ci else 3579bf215546Sopenharmony_ci value = ice->state.vertices_per_patch; 3580bf215546Sopenharmony_ci } 3581bf215546Sopenharmony_ci } else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X && 3582bf215546Sopenharmony_ci sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) { 3583bf215546Sopenharmony_ci unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; 3584bf215546Sopenharmony_ci value = fui(ice->state.default_outer_level[i]); 3585bf215546Sopenharmony_ci } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) { 3586bf215546Sopenharmony_ci value = fui(ice->state.default_inner_level[0]); 3587bf215546Sopenharmony_ci } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { 3588bf215546Sopenharmony_ci value = fui(ice->state.default_inner_level[1]); 3589bf215546Sopenharmony_ci } else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X && 3590bf215546Sopenharmony_ci sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) { 3591bf215546Sopenharmony_ci unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X; 3592bf215546Sopenharmony_ci value = ice->state.last_block[i]; 3593bf215546Sopenharmony_ci } else { 3594bf215546Sopenharmony_ci assert(!"unhandled system value"); 3595bf215546Sopenharmony_ci } 3596bf215546Sopenharmony_ci 3597bf215546Sopenharmony_ci *map++ = value; 3598bf215546Sopenharmony_ci } 3599bf215546Sopenharmony_ci 3600bf215546Sopenharmony_ci cbuf->buffer_size = upload_size; 3601bf215546Sopenharmony_ci shs->sysvals_need_upload = false; 3602bf215546Sopenharmony_ci} 3603bf215546Sopenharmony_ci 3604bf215546Sopenharmony_ci/** 3605bf215546Sopenharmony_ci * The pipe->set_shader_buffers() driver hook. 3606bf215546Sopenharmony_ci * 3607bf215546Sopenharmony_ci * This binds SSBOs and ABOs. Unfortunately, we need to stream out 3608bf215546Sopenharmony_ci * SURFACE_STATE here, as the buffer offset may change each time. 3609bf215546Sopenharmony_ci */ 3610bf215546Sopenharmony_cistatic void 3611bf215546Sopenharmony_cicrocus_set_shader_buffers(struct pipe_context *ctx, 3612bf215546Sopenharmony_ci enum pipe_shader_type p_stage, 3613bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 3614bf215546Sopenharmony_ci const struct pipe_shader_buffer *buffers, 3615bf215546Sopenharmony_ci unsigned writable_bitmask) 3616bf215546Sopenharmony_ci{ 3617bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3618bf215546Sopenharmony_ci gl_shader_stage stage = stage_from_pipe(p_stage); 3619bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 3620bf215546Sopenharmony_ci 3621bf215546Sopenharmony_ci unsigned modified_bits = u_bit_consecutive(start_slot, count); 3622bf215546Sopenharmony_ci 3623bf215546Sopenharmony_ci shs->bound_ssbos &= ~modified_bits; 3624bf215546Sopenharmony_ci shs->writable_ssbos &= ~modified_bits; 3625bf215546Sopenharmony_ci shs->writable_ssbos |= writable_bitmask << start_slot; 3626bf215546Sopenharmony_ci 3627bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) { 3628bf215546Sopenharmony_ci if (buffers && buffers[i].buffer) { 3629bf215546Sopenharmony_ci struct crocus_resource *res = (void *) buffers[i].buffer; 3630bf215546Sopenharmony_ci struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i]; 3631bf215546Sopenharmony_ci pipe_resource_reference(&ssbo->buffer, &res->base.b); 3632bf215546Sopenharmony_ci ssbo->buffer_offset = buffers[i].buffer_offset; 3633bf215546Sopenharmony_ci ssbo->buffer_size = 3634bf215546Sopenharmony_ci MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset); 3635bf215546Sopenharmony_ci 3636bf215546Sopenharmony_ci shs->bound_ssbos |= 1 << (start_slot + i); 3637bf215546Sopenharmony_ci 3638bf215546Sopenharmony_ci res->bind_history |= PIPE_BIND_SHADER_BUFFER; 3639bf215546Sopenharmony_ci res->bind_stages |= 1 << stage; 3640bf215546Sopenharmony_ci 3641bf215546Sopenharmony_ci util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset, 3642bf215546Sopenharmony_ci ssbo->buffer_offset + ssbo->buffer_size); 3643bf215546Sopenharmony_ci } else { 3644bf215546Sopenharmony_ci pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL); 3645bf215546Sopenharmony_ci } 3646bf215546Sopenharmony_ci } 3647bf215546Sopenharmony_ci 3648bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage; 3649bf215546Sopenharmony_ci} 3650bf215546Sopenharmony_ci 3651bf215546Sopenharmony_cistatic void 3652bf215546Sopenharmony_cicrocus_delete_state(struct pipe_context *ctx, void *state) 3653bf215546Sopenharmony_ci{ 3654bf215546Sopenharmony_ci free(state); 3655bf215546Sopenharmony_ci} 3656bf215546Sopenharmony_ci 3657bf215546Sopenharmony_ci/** 3658bf215546Sopenharmony_ci * The pipe->set_vertex_buffers() driver hook. 3659bf215546Sopenharmony_ci * 3660bf215546Sopenharmony_ci * This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet. 3661bf215546Sopenharmony_ci */ 3662bf215546Sopenharmony_cistatic void 3663bf215546Sopenharmony_cicrocus_set_vertex_buffers(struct pipe_context *ctx, 3664bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 3665bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, 3666bf215546Sopenharmony_ci bool take_ownership, 3667bf215546Sopenharmony_ci const struct pipe_vertex_buffer *buffers) 3668bf215546Sopenharmony_ci{ 3669bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3670bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *) ctx->screen; 3671bf215546Sopenharmony_ci const unsigned padding = 3672bf215546Sopenharmony_ci (GFX_VERx10 < 75 && screen->devinfo.platform != INTEL_PLATFORM_BYT) * 2; 3673bf215546Sopenharmony_ci ice->state.bound_vertex_buffers &= 3674bf215546Sopenharmony_ci ~u_bit_consecutive64(start_slot, count + unbind_num_trailing_slots); 3675bf215546Sopenharmony_ci 3676bf215546Sopenharmony_ci util_set_vertex_buffers_mask(ice->state.vertex_buffers, &ice->state.bound_vertex_buffers, 3677bf215546Sopenharmony_ci buffers, start_slot, count, unbind_num_trailing_slots, 3678bf215546Sopenharmony_ci take_ownership); 3679bf215546Sopenharmony_ci 3680bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) { 3681bf215546Sopenharmony_ci struct pipe_vertex_buffer *state = 3682bf215546Sopenharmony_ci &ice->state.vertex_buffers[start_slot + i]; 3683bf215546Sopenharmony_ci 3684bf215546Sopenharmony_ci if (!state->is_user_buffer && state->buffer.resource) { 3685bf215546Sopenharmony_ci struct crocus_resource *res = (void *)state->buffer.resource; 3686bf215546Sopenharmony_ci res->bind_history |= PIPE_BIND_VERTEX_BUFFER; 3687bf215546Sopenharmony_ci } 3688bf215546Sopenharmony_ci 3689bf215546Sopenharmony_ci uint32_t end = 0; 3690bf215546Sopenharmony_ci if (state->buffer.resource) 3691bf215546Sopenharmony_ci end = state->buffer.resource->width0 + padding; 3692bf215546Sopenharmony_ci ice->state.vb_end[start_slot + i] = end; 3693bf215546Sopenharmony_ci } 3694bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS; 3695bf215546Sopenharmony_ci} 3696bf215546Sopenharmony_ci 3697bf215546Sopenharmony_ci#if GFX_VERx10 < 75 3698bf215546Sopenharmony_cistatic uint8_t get_wa_flags(enum isl_format format) 3699bf215546Sopenharmony_ci{ 3700bf215546Sopenharmony_ci uint8_t wa_flags = 0; 3701bf215546Sopenharmony_ci 3702bf215546Sopenharmony_ci switch (format) { 3703bf215546Sopenharmony_ci case ISL_FORMAT_R10G10B10A2_USCALED: 3704bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SCALE; 3705bf215546Sopenharmony_ci break; 3706bf215546Sopenharmony_ci case ISL_FORMAT_R10G10B10A2_SSCALED: 3707bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE; 3708bf215546Sopenharmony_ci break; 3709bf215546Sopenharmony_ci case ISL_FORMAT_R10G10B10A2_UNORM: 3710bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_NORMALIZE; 3711bf215546Sopenharmony_ci break; 3712bf215546Sopenharmony_ci case ISL_FORMAT_R10G10B10A2_SNORM: 3713bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE; 3714bf215546Sopenharmony_ci break; 3715bf215546Sopenharmony_ci case ISL_FORMAT_R10G10B10A2_SINT: 3716bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SIGN; 3717bf215546Sopenharmony_ci break; 3718bf215546Sopenharmony_ci case ISL_FORMAT_B10G10R10A2_USCALED: 3719bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA; 3720bf215546Sopenharmony_ci break; 3721bf215546Sopenharmony_ci case ISL_FORMAT_B10G10R10A2_SSCALED: 3722bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA; 3723bf215546Sopenharmony_ci break; 3724bf215546Sopenharmony_ci case ISL_FORMAT_B10G10R10A2_UNORM: 3725bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA; 3726bf215546Sopenharmony_ci break; 3727bf215546Sopenharmony_ci case ISL_FORMAT_B10G10R10A2_SNORM: 3728bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA; 3729bf215546Sopenharmony_ci break; 3730bf215546Sopenharmony_ci case ISL_FORMAT_B10G10R10A2_SINT: 3731bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_BGRA; 3732bf215546Sopenharmony_ci break; 3733bf215546Sopenharmony_ci case ISL_FORMAT_B10G10R10A2_UINT: 3734bf215546Sopenharmony_ci wa_flags = BRW_ATTRIB_WA_BGRA; 3735bf215546Sopenharmony_ci break; 3736bf215546Sopenharmony_ci default: 3737bf215546Sopenharmony_ci break; 3738bf215546Sopenharmony_ci } 3739bf215546Sopenharmony_ci return wa_flags; 3740bf215546Sopenharmony_ci} 3741bf215546Sopenharmony_ci#endif 3742bf215546Sopenharmony_ci 3743bf215546Sopenharmony_ci/** 3744bf215546Sopenharmony_ci * Gallium CSO for vertex elements. 3745bf215546Sopenharmony_ci */ 3746bf215546Sopenharmony_cistruct crocus_vertex_element_state { 3747bf215546Sopenharmony_ci uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; 3748bf215546Sopenharmony_ci#if GFX_VER == 8 3749bf215546Sopenharmony_ci uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)]; 3750bf215546Sopenharmony_ci#endif 3751bf215546Sopenharmony_ci uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)]; 3752bf215546Sopenharmony_ci#if GFX_VER == 8 3753bf215546Sopenharmony_ci uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)]; 3754bf215546Sopenharmony_ci#endif 3755bf215546Sopenharmony_ci uint32_t step_rate[16]; 3756bf215546Sopenharmony_ci uint8_t wa_flags[33]; 3757bf215546Sopenharmony_ci unsigned count; 3758bf215546Sopenharmony_ci}; 3759bf215546Sopenharmony_ci 3760bf215546Sopenharmony_ci/** 3761bf215546Sopenharmony_ci * The pipe->create_vertex_elements() driver hook. 3762bf215546Sopenharmony_ci * 3763bf215546Sopenharmony_ci * This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS 3764bf215546Sopenharmony_ci * and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing 3765bf215546Sopenharmony_ci * arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are 3766bf215546Sopenharmony_ci * needed. In these cases we will need information available at draw time. 3767bf215546Sopenharmony_ci * We setup edgeflag_ve and edgeflag_vfi as alternatives last 3768bf215546Sopenharmony_ci * 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at 3769bf215546Sopenharmony_ci * draw time if we detect that EdgeFlag is needed by the Vertex Shader. 3770bf215546Sopenharmony_ci */ 3771bf215546Sopenharmony_cistatic void * 3772bf215546Sopenharmony_cicrocus_create_vertex_elements(struct pipe_context *ctx, 3773bf215546Sopenharmony_ci unsigned count, 3774bf215546Sopenharmony_ci const struct pipe_vertex_element *state) 3775bf215546Sopenharmony_ci{ 3776bf215546Sopenharmony_ci struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 3777bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &screen->devinfo; 3778bf215546Sopenharmony_ci struct crocus_vertex_element_state *cso = 3779bf215546Sopenharmony_ci malloc(sizeof(struct crocus_vertex_element_state)); 3780bf215546Sopenharmony_ci 3781bf215546Sopenharmony_ci cso->count = count; 3782bf215546Sopenharmony_ci 3783bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) { 3784bf215546Sopenharmony_ci ve.DWordLength = 3785bf215546Sopenharmony_ci 1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2; 3786bf215546Sopenharmony_ci } 3787bf215546Sopenharmony_ci 3788bf215546Sopenharmony_ci uint32_t *ve_pack_dest = &cso->vertex_elements[1]; 3789bf215546Sopenharmony_ci#if GFX_VER == 8 3790bf215546Sopenharmony_ci uint32_t *vfi_pack_dest = cso->vf_instancing; 3791bf215546Sopenharmony_ci#endif 3792bf215546Sopenharmony_ci 3793bf215546Sopenharmony_ci if (count == 0) { 3794bf215546Sopenharmony_ci crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 3795bf215546Sopenharmony_ci ve.Valid = true; 3796bf215546Sopenharmony_ci ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; 3797bf215546Sopenharmony_ci ve.Component0Control = VFCOMP_STORE_0; 3798bf215546Sopenharmony_ci ve.Component1Control = VFCOMP_STORE_0; 3799bf215546Sopenharmony_ci ve.Component2Control = VFCOMP_STORE_0; 3800bf215546Sopenharmony_ci ve.Component3Control = VFCOMP_STORE_1_FP; 3801bf215546Sopenharmony_ci } 3802bf215546Sopenharmony_ci#if GFX_VER == 8 3803bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 3804bf215546Sopenharmony_ci } 3805bf215546Sopenharmony_ci#endif 3806bf215546Sopenharmony_ci } 3807bf215546Sopenharmony_ci 3808bf215546Sopenharmony_ci for (int i = 0; i < count; i++) { 3809bf215546Sopenharmony_ci const struct crocus_format_info fmt = 3810bf215546Sopenharmony_ci crocus_format_for_usage(devinfo, state[i].src_format, 0); 3811bf215546Sopenharmony_ci unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, 3812bf215546Sopenharmony_ci VFCOMP_STORE_SRC, VFCOMP_STORE_SRC }; 3813bf215546Sopenharmony_ci enum isl_format actual_fmt = fmt.fmt; 3814bf215546Sopenharmony_ci 3815bf215546Sopenharmony_ci#if GFX_VERx10 < 75 3816bf215546Sopenharmony_ci cso->wa_flags[i] = get_wa_flags(fmt.fmt); 3817bf215546Sopenharmony_ci 3818bf215546Sopenharmony_ci if (fmt.fmt == ISL_FORMAT_R10G10B10A2_USCALED || 3819bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_R10G10B10A2_SSCALED || 3820bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_R10G10B10A2_UNORM || 3821bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_R10G10B10A2_SNORM || 3822bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_R10G10B10A2_SINT || 3823bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_B10G10R10A2_USCALED || 3824bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_B10G10R10A2_SSCALED || 3825bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_B10G10R10A2_UNORM || 3826bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_B10G10R10A2_SNORM || 3827bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_B10G10R10A2_UINT || 3828bf215546Sopenharmony_ci fmt.fmt == ISL_FORMAT_B10G10R10A2_SINT) 3829bf215546Sopenharmony_ci actual_fmt = ISL_FORMAT_R10G10B10A2_UINT; 3830bf215546Sopenharmony_ci if (fmt.fmt == ISL_FORMAT_R8G8B8_SINT) 3831bf215546Sopenharmony_ci actual_fmt = ISL_FORMAT_R8G8B8A8_SINT; 3832bf215546Sopenharmony_ci if (fmt.fmt == ISL_FORMAT_R8G8B8_UINT) 3833bf215546Sopenharmony_ci actual_fmt = ISL_FORMAT_R8G8B8A8_UINT; 3834bf215546Sopenharmony_ci if (fmt.fmt == ISL_FORMAT_R16G16B16_SINT) 3835bf215546Sopenharmony_ci actual_fmt = ISL_FORMAT_R16G16B16A16_SINT; 3836bf215546Sopenharmony_ci if (fmt.fmt == ISL_FORMAT_R16G16B16_UINT) 3837bf215546Sopenharmony_ci actual_fmt = ISL_FORMAT_R16G16B16A16_UINT; 3838bf215546Sopenharmony_ci#endif 3839bf215546Sopenharmony_ci 3840bf215546Sopenharmony_ci cso->step_rate[state[i].vertex_buffer_index] = state[i].instance_divisor; 3841bf215546Sopenharmony_ci 3842bf215546Sopenharmony_ci switch (isl_format_get_num_channels(fmt.fmt)) { 3843bf215546Sopenharmony_ci case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH; 3844bf215546Sopenharmony_ci case 1: comp[1] = VFCOMP_STORE_0; FALLTHROUGH; 3845bf215546Sopenharmony_ci case 2: comp[2] = VFCOMP_STORE_0; FALLTHROUGH; 3846bf215546Sopenharmony_ci case 3: 3847bf215546Sopenharmony_ci comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT 3848bf215546Sopenharmony_ci : VFCOMP_STORE_1_FP; 3849bf215546Sopenharmony_ci break; 3850bf215546Sopenharmony_ci } 3851bf215546Sopenharmony_ci crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 3852bf215546Sopenharmony_ci#if GFX_VER >= 6 3853bf215546Sopenharmony_ci ve.EdgeFlagEnable = false; 3854bf215546Sopenharmony_ci#endif 3855bf215546Sopenharmony_ci ve.VertexBufferIndex = state[i].vertex_buffer_index; 3856bf215546Sopenharmony_ci ve.Valid = true; 3857bf215546Sopenharmony_ci ve.SourceElementOffset = state[i].src_offset; 3858bf215546Sopenharmony_ci ve.SourceElementFormat = actual_fmt; 3859bf215546Sopenharmony_ci ve.Component0Control = comp[0]; 3860bf215546Sopenharmony_ci ve.Component1Control = comp[1]; 3861bf215546Sopenharmony_ci ve.Component2Control = comp[2]; 3862bf215546Sopenharmony_ci ve.Component3Control = comp[3]; 3863bf215546Sopenharmony_ci#if GFX_VER < 5 3864bf215546Sopenharmony_ci ve.DestinationElementOffset = i * 4; 3865bf215546Sopenharmony_ci#endif 3866bf215546Sopenharmony_ci } 3867bf215546Sopenharmony_ci 3868bf215546Sopenharmony_ci#if GFX_VER == 8 3869bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 3870bf215546Sopenharmony_ci vi.VertexElementIndex = i; 3871bf215546Sopenharmony_ci vi.InstancingEnable = state[i].instance_divisor > 0; 3872bf215546Sopenharmony_ci vi.InstanceDataStepRate = state[i].instance_divisor; 3873bf215546Sopenharmony_ci } 3874bf215546Sopenharmony_ci#endif 3875bf215546Sopenharmony_ci ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 3876bf215546Sopenharmony_ci#if GFX_VER == 8 3877bf215546Sopenharmony_ci vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length); 3878bf215546Sopenharmony_ci#endif 3879bf215546Sopenharmony_ci } 3880bf215546Sopenharmony_ci 3881bf215546Sopenharmony_ci /* An alternative version of the last VE and VFI is stored so it 3882bf215546Sopenharmony_ci * can be used at draw time in case Vertex Shader uses EdgeFlag 3883bf215546Sopenharmony_ci */ 3884bf215546Sopenharmony_ci if (count) { 3885bf215546Sopenharmony_ci const unsigned edgeflag_index = count - 1; 3886bf215546Sopenharmony_ci const struct crocus_format_info fmt = 3887bf215546Sopenharmony_ci crocus_format_for_usage(devinfo, state[edgeflag_index].src_format, 0); 3888bf215546Sopenharmony_ci crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) { 3889bf215546Sopenharmony_ci#if GFX_VER >= 6 3890bf215546Sopenharmony_ci ve.EdgeFlagEnable = true; 3891bf215546Sopenharmony_ci#endif 3892bf215546Sopenharmony_ci ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index; 3893bf215546Sopenharmony_ci ve.Valid = true; 3894bf215546Sopenharmony_ci ve.SourceElementOffset = state[edgeflag_index].src_offset; 3895bf215546Sopenharmony_ci ve.SourceElementFormat = fmt.fmt; 3896bf215546Sopenharmony_ci ve.Component0Control = VFCOMP_STORE_SRC; 3897bf215546Sopenharmony_ci ve.Component1Control = VFCOMP_STORE_0; 3898bf215546Sopenharmony_ci ve.Component2Control = VFCOMP_STORE_0; 3899bf215546Sopenharmony_ci ve.Component3Control = VFCOMP_STORE_0; 3900bf215546Sopenharmony_ci } 3901bf215546Sopenharmony_ci#if GFX_VER == 8 3902bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) { 3903bf215546Sopenharmony_ci /* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled 3904bf215546Sopenharmony_ci * at draw time, as it should change if SGVs are emitted. 3905bf215546Sopenharmony_ci */ 3906bf215546Sopenharmony_ci vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0; 3907bf215546Sopenharmony_ci vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor; 3908bf215546Sopenharmony_ci } 3909bf215546Sopenharmony_ci#endif 3910bf215546Sopenharmony_ci } 3911bf215546Sopenharmony_ci 3912bf215546Sopenharmony_ci return cso; 3913bf215546Sopenharmony_ci} 3914bf215546Sopenharmony_ci 3915bf215546Sopenharmony_ci/** 3916bf215546Sopenharmony_ci * The pipe->bind_vertex_elements_state() driver hook. 3917bf215546Sopenharmony_ci */ 3918bf215546Sopenharmony_cistatic void 3919bf215546Sopenharmony_cicrocus_bind_vertex_elements_state(struct pipe_context *ctx, void *state) 3920bf215546Sopenharmony_ci{ 3921bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 3922bf215546Sopenharmony_ci#if GFX_VER == 8 3923bf215546Sopenharmony_ci struct crocus_vertex_element_state *old_cso = ice->state.cso_vertex_elements; 3924bf215546Sopenharmony_ci struct crocus_vertex_element_state *new_cso = state; 3925bf215546Sopenharmony_ci 3926bf215546Sopenharmony_ci if (new_cso && cso_changed(count)) 3927bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS; 3928bf215546Sopenharmony_ci#endif 3929bf215546Sopenharmony_ci ice->state.cso_vertex_elements = state; 3930bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS; 3931bf215546Sopenharmony_ci ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_VERTEX_ELEMENTS]; 3932bf215546Sopenharmony_ci} 3933bf215546Sopenharmony_ci 3934bf215546Sopenharmony_ci#if GFX_VER >= 6 3935bf215546Sopenharmony_cistruct crocus_streamout_counter { 3936bf215546Sopenharmony_ci uint32_t offset_start; 3937bf215546Sopenharmony_ci uint32_t offset_end; 3938bf215546Sopenharmony_ci 3939bf215546Sopenharmony_ci uint64_t accum; 3940bf215546Sopenharmony_ci}; 3941bf215546Sopenharmony_ci 3942bf215546Sopenharmony_ci/** 3943bf215546Sopenharmony_ci * Gallium CSO for stream output (transform feedback) targets. 3944bf215546Sopenharmony_ci */ 3945bf215546Sopenharmony_cistruct crocus_stream_output_target { 3946bf215546Sopenharmony_ci struct pipe_stream_output_target base; 3947bf215546Sopenharmony_ci 3948bf215546Sopenharmony_ci /** Stride (bytes-per-vertex) during this transform feedback operation */ 3949bf215546Sopenharmony_ci uint16_t stride; 3950bf215546Sopenharmony_ci 3951bf215546Sopenharmony_ci /** Has 3DSTATE_SO_BUFFER actually been emitted, zeroing the offsets? */ 3952bf215546Sopenharmony_ci bool zeroed; 3953bf215546Sopenharmony_ci 3954bf215546Sopenharmony_ci struct crocus_resource *offset_res; 3955bf215546Sopenharmony_ci uint32_t offset_offset; 3956bf215546Sopenharmony_ci 3957bf215546Sopenharmony_ci#if GFX_VER == 6 3958bf215546Sopenharmony_ci void *prim_map; 3959bf215546Sopenharmony_ci struct crocus_streamout_counter prev_count; 3960bf215546Sopenharmony_ci struct crocus_streamout_counter count; 3961bf215546Sopenharmony_ci#endif 3962bf215546Sopenharmony_ci#if GFX_VER == 8 3963bf215546Sopenharmony_ci /** Does the next 3DSTATE_SO_BUFFER need to zero the offsets? */ 3964bf215546Sopenharmony_ci bool zero_offset; 3965bf215546Sopenharmony_ci#endif 3966bf215546Sopenharmony_ci}; 3967bf215546Sopenharmony_ci 3968bf215546Sopenharmony_ci#if GFX_VER >= 7 3969bf215546Sopenharmony_cistatic uint32_t 3970bf215546Sopenharmony_cicrocus_get_so_offset(struct pipe_stream_output_target *so) 3971bf215546Sopenharmony_ci{ 3972bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = (void *)so; 3973bf215546Sopenharmony_ci struct pipe_transfer *transfer; 3974bf215546Sopenharmony_ci struct pipe_box box; 3975bf215546Sopenharmony_ci uint32_t result; 3976bf215546Sopenharmony_ci u_box_1d(tgt->offset_offset, 4, &box); 3977bf215546Sopenharmony_ci void *val = so->context->buffer_map(so->context, &tgt->offset_res->base.b, 3978bf215546Sopenharmony_ci 0, PIPE_MAP_DIRECTLY, 3979bf215546Sopenharmony_ci &box, &transfer); 3980bf215546Sopenharmony_ci assert(val); 3981bf215546Sopenharmony_ci result = *(uint32_t *)val; 3982bf215546Sopenharmony_ci so->context->buffer_unmap(so->context, transfer); 3983bf215546Sopenharmony_ci 3984bf215546Sopenharmony_ci return result / tgt->stride; 3985bf215546Sopenharmony_ci} 3986bf215546Sopenharmony_ci#endif 3987bf215546Sopenharmony_ci 3988bf215546Sopenharmony_ci#if GFX_VER == 6 3989bf215546Sopenharmony_cistatic void 3990bf215546Sopenharmony_cicompute_vertices_written_so_far(struct crocus_context *ice, 3991bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt, 3992bf215546Sopenharmony_ci struct crocus_streamout_counter *count, 3993bf215546Sopenharmony_ci uint64_t *svbi); 3994bf215546Sopenharmony_ci 3995bf215546Sopenharmony_cistatic uint32_t 3996bf215546Sopenharmony_cicrocus_get_so_offset(struct pipe_stream_output_target *so) 3997bf215546Sopenharmony_ci{ 3998bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = (void *)so; 3999bf215546Sopenharmony_ci struct crocus_context *ice = (void *)so->context; 4000bf215546Sopenharmony_ci 4001bf215546Sopenharmony_ci uint64_t vert_written; 4002bf215546Sopenharmony_ci compute_vertices_written_so_far(ice, tgt, &tgt->prev_count, &vert_written); 4003bf215546Sopenharmony_ci return vert_written; 4004bf215546Sopenharmony_ci} 4005bf215546Sopenharmony_ci#endif 4006bf215546Sopenharmony_ci 4007bf215546Sopenharmony_ci/** 4008bf215546Sopenharmony_ci * The pipe->create_stream_output_target() driver hook. 4009bf215546Sopenharmony_ci * 4010bf215546Sopenharmony_ci * "Target" here refers to a destination buffer. We translate this into 4011bf215546Sopenharmony_ci * a 3DSTATE_SO_BUFFER packet. We can handle most fields, but don't yet 4012bf215546Sopenharmony_ci * know which buffer this represents, or whether we ought to zero the 4013bf215546Sopenharmony_ci * write-offsets, or append. Those are handled in the set() hook. 4014bf215546Sopenharmony_ci */ 4015bf215546Sopenharmony_cistatic struct pipe_stream_output_target * 4016bf215546Sopenharmony_cicrocus_create_stream_output_target(struct pipe_context *ctx, 4017bf215546Sopenharmony_ci struct pipe_resource *p_res, 4018bf215546Sopenharmony_ci unsigned buffer_offset, 4019bf215546Sopenharmony_ci unsigned buffer_size) 4020bf215546Sopenharmony_ci{ 4021bf215546Sopenharmony_ci struct crocus_resource *res = (void *) p_res; 4022bf215546Sopenharmony_ci struct crocus_stream_output_target *cso = calloc(1, sizeof(*cso)); 4023bf215546Sopenharmony_ci if (!cso) 4024bf215546Sopenharmony_ci return NULL; 4025bf215546Sopenharmony_ci 4026bf215546Sopenharmony_ci res->bind_history |= PIPE_BIND_STREAM_OUTPUT; 4027bf215546Sopenharmony_ci 4028bf215546Sopenharmony_ci pipe_reference_init(&cso->base.reference, 1); 4029bf215546Sopenharmony_ci pipe_resource_reference(&cso->base.buffer, p_res); 4030bf215546Sopenharmony_ci cso->base.buffer_offset = buffer_offset; 4031bf215546Sopenharmony_ci cso->base.buffer_size = buffer_size; 4032bf215546Sopenharmony_ci cso->base.context = ctx; 4033bf215546Sopenharmony_ci 4034bf215546Sopenharmony_ci util_range_add(&res->base.b, &res->valid_buffer_range, buffer_offset, 4035bf215546Sopenharmony_ci buffer_offset + buffer_size); 4036bf215546Sopenharmony_ci#if GFX_VER >= 7 4037bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 4038bf215546Sopenharmony_ci void *temp; 4039bf215546Sopenharmony_ci u_upload_alloc(ice->ctx.stream_uploader, 0, sizeof(uint32_t), 4, 4040bf215546Sopenharmony_ci &cso->offset_offset, 4041bf215546Sopenharmony_ci (struct pipe_resource **)&cso->offset_res, 4042bf215546Sopenharmony_ci &temp); 4043bf215546Sopenharmony_ci#endif 4044bf215546Sopenharmony_ci 4045bf215546Sopenharmony_ci return &cso->base; 4046bf215546Sopenharmony_ci} 4047bf215546Sopenharmony_ci 4048bf215546Sopenharmony_cistatic void 4049bf215546Sopenharmony_cicrocus_stream_output_target_destroy(struct pipe_context *ctx, 4050bf215546Sopenharmony_ci struct pipe_stream_output_target *state) 4051bf215546Sopenharmony_ci{ 4052bf215546Sopenharmony_ci struct crocus_stream_output_target *cso = (void *) state; 4053bf215546Sopenharmony_ci 4054bf215546Sopenharmony_ci pipe_resource_reference((struct pipe_resource **)&cso->offset_res, NULL); 4055bf215546Sopenharmony_ci pipe_resource_reference(&cso->base.buffer, NULL); 4056bf215546Sopenharmony_ci 4057bf215546Sopenharmony_ci free(cso); 4058bf215546Sopenharmony_ci} 4059bf215546Sopenharmony_ci 4060bf215546Sopenharmony_ci#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288 4061bf215546Sopenharmony_ci#define GEN7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4) 4062bf215546Sopenharmony_ci 4063bf215546Sopenharmony_ci#if GFX_VER == 6 4064bf215546Sopenharmony_cistatic void 4065bf215546Sopenharmony_ciaggregate_stream_counter(struct crocus_batch *batch, struct crocus_stream_output_target *tgt, 4066bf215546Sopenharmony_ci struct crocus_streamout_counter *counter) 4067bf215546Sopenharmony_ci{ 4068bf215546Sopenharmony_ci uint64_t *prim_counts = tgt->prim_map; 4069bf215546Sopenharmony_ci 4070bf215546Sopenharmony_ci if (crocus_batch_references(batch, tgt->offset_res->bo)) { 4071bf215546Sopenharmony_ci struct pipe_fence_handle *out_fence = NULL; 4072bf215546Sopenharmony_ci batch->ice->ctx.flush(&batch->ice->ctx, &out_fence, 0); 4073bf215546Sopenharmony_ci batch->screen->base.fence_finish(&batch->screen->base, &batch->ice->ctx, out_fence, UINT64_MAX); 4074bf215546Sopenharmony_ci batch->screen->base.fence_reference(&batch->screen->base, &out_fence, NULL); 4075bf215546Sopenharmony_ci } 4076bf215546Sopenharmony_ci 4077bf215546Sopenharmony_ci for (unsigned i = counter->offset_start / sizeof(uint64_t); i < counter->offset_end / sizeof(uint64_t); i += 2) { 4078bf215546Sopenharmony_ci counter->accum += prim_counts[i + 1] - prim_counts[i]; 4079bf215546Sopenharmony_ci } 4080bf215546Sopenharmony_ci tgt->count.offset_start = tgt->count.offset_end = 0; 4081bf215546Sopenharmony_ci} 4082bf215546Sopenharmony_ci 4083bf215546Sopenharmony_cistatic void 4084bf215546Sopenharmony_cicrocus_stream_store_prims_written(struct crocus_batch *batch, 4085bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt) 4086bf215546Sopenharmony_ci{ 4087bf215546Sopenharmony_ci if (!tgt->offset_res) { 4088bf215546Sopenharmony_ci u_upload_alloc(batch->ice->ctx.stream_uploader, 0, 4096, 4, 4089bf215546Sopenharmony_ci &tgt->offset_offset, 4090bf215546Sopenharmony_ci (struct pipe_resource **)&tgt->offset_res, 4091bf215546Sopenharmony_ci &tgt->prim_map); 4092bf215546Sopenharmony_ci tgt->count.offset_start = tgt->count.offset_end = 0; 4093bf215546Sopenharmony_ci } 4094bf215546Sopenharmony_ci 4095bf215546Sopenharmony_ci if (tgt->count.offset_end + 16 >= 4096) { 4096bf215546Sopenharmony_ci aggregate_stream_counter(batch, tgt, &tgt->prev_count); 4097bf215546Sopenharmony_ci aggregate_stream_counter(batch, tgt, &tgt->count); 4098bf215546Sopenharmony_ci } 4099bf215546Sopenharmony_ci 4100bf215546Sopenharmony_ci crocus_emit_mi_flush(batch); 4101bf215546Sopenharmony_ci crocus_store_register_mem64(batch, GEN6_SO_NUM_PRIMS_WRITTEN, 4102bf215546Sopenharmony_ci tgt->offset_res->bo, 4103bf215546Sopenharmony_ci tgt->count.offset_end + tgt->offset_offset, false); 4104bf215546Sopenharmony_ci tgt->count.offset_end += 8; 4105bf215546Sopenharmony_ci} 4106bf215546Sopenharmony_ci 4107bf215546Sopenharmony_cistatic void 4108bf215546Sopenharmony_cicompute_vertices_written_so_far(struct crocus_context *ice, 4109bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt, 4110bf215546Sopenharmony_ci struct crocus_streamout_counter *counter, 4111bf215546Sopenharmony_ci uint64_t *svbi) 4112bf215546Sopenharmony_ci{ 4113bf215546Sopenharmony_ci //TODO vertices per prim 4114bf215546Sopenharmony_ci aggregate_stream_counter(&ice->batches[0], tgt, counter); 4115bf215546Sopenharmony_ci 4116bf215546Sopenharmony_ci *svbi = counter->accum * ice->state.last_xfb_verts_per_prim; 4117bf215546Sopenharmony_ci} 4118bf215546Sopenharmony_ci#endif 4119bf215546Sopenharmony_ci/** 4120bf215546Sopenharmony_ci * The pipe->set_stream_output_targets() driver hook. 4121bf215546Sopenharmony_ci * 4122bf215546Sopenharmony_ci * At this point, we know which targets are bound to a particular index, 4123bf215546Sopenharmony_ci * and also whether we want to append or start over. We can finish the 4124bf215546Sopenharmony_ci * 3DSTATE_SO_BUFFER packets we started earlier. 4125bf215546Sopenharmony_ci */ 4126bf215546Sopenharmony_cistatic void 4127bf215546Sopenharmony_cicrocus_set_stream_output_targets(struct pipe_context *ctx, 4128bf215546Sopenharmony_ci unsigned num_targets, 4129bf215546Sopenharmony_ci struct pipe_stream_output_target **targets, 4130bf215546Sopenharmony_ci const unsigned *offsets) 4131bf215546Sopenharmony_ci{ 4132bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 4133bf215546Sopenharmony_ci struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 4134bf215546Sopenharmony_ci struct pipe_stream_output_target *old_tgt[4] = { NULL, NULL, NULL, NULL }; 4135bf215546Sopenharmony_ci const bool active = num_targets > 0; 4136bf215546Sopenharmony_ci if (ice->state.streamout_active != active) { 4137bf215546Sopenharmony_ci ice->state.streamout_active = active; 4138bf215546Sopenharmony_ci#if GFX_VER >= 7 4139bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_STREAMOUT; 4140bf215546Sopenharmony_ci#else 4141bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG; 4142bf215546Sopenharmony_ci#endif 4143bf215546Sopenharmony_ci 4144bf215546Sopenharmony_ci /* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because 4145bf215546Sopenharmony_ci * it's a non-pipelined command. If we're switching streamout on, we 4146bf215546Sopenharmony_ci * may have missed emitting it earlier, so do so now. (We're already 4147bf215546Sopenharmony_ci * taking a stall to update 3DSTATE_SO_BUFFERS anyway...) 4148bf215546Sopenharmony_ci */ 4149bf215546Sopenharmony_ci if (active) { 4150bf215546Sopenharmony_ci#if GFX_VER >= 7 4151bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST; 4152bf215546Sopenharmony_ci#endif 4153bf215546Sopenharmony_ci } else { 4154bf215546Sopenharmony_ci uint32_t flush = 0; 4155bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 4156bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 4157bf215546Sopenharmony_ci (void *) ice->state.so_target[i]; 4158bf215546Sopenharmony_ci if (tgt) { 4159bf215546Sopenharmony_ci struct crocus_resource *res = (void *) tgt->base.buffer; 4160bf215546Sopenharmony_ci 4161bf215546Sopenharmony_ci flush |= crocus_flush_bits_for_history(res); 4162bf215546Sopenharmony_ci crocus_dirty_for_history(ice, res); 4163bf215546Sopenharmony_ci } 4164bf215546Sopenharmony_ci } 4165bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER], 4166bf215546Sopenharmony_ci "make streamout results visible", flush); 4167bf215546Sopenharmony_ci } 4168bf215546Sopenharmony_ci } 4169bf215546Sopenharmony_ci 4170bf215546Sopenharmony_ci ice->state.so_targets = num_targets; 4171bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 4172bf215546Sopenharmony_ci pipe_so_target_reference(&old_tgt[i], ice->state.so_target[i]); 4173bf215546Sopenharmony_ci pipe_so_target_reference(&ice->state.so_target[i], 4174bf215546Sopenharmony_ci i < num_targets ? targets[i] : NULL); 4175bf215546Sopenharmony_ci } 4176bf215546Sopenharmony_ci 4177bf215546Sopenharmony_ci#if GFX_VER == 6 4178bf215546Sopenharmony_ci bool stored_num_prims = false; 4179bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 4180bf215546Sopenharmony_ci if (num_targets) { 4181bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 4182bf215546Sopenharmony_ci (void *) ice->state.so_target[i]; 4183bf215546Sopenharmony_ci 4184bf215546Sopenharmony_ci if (!tgt) 4185bf215546Sopenharmony_ci continue; 4186bf215546Sopenharmony_ci if (offsets[i] == 0) { 4187bf215546Sopenharmony_ci // This means that we're supposed to ignore anything written to 4188bf215546Sopenharmony_ci // the buffer before. We can do this by just clearing out the 4189bf215546Sopenharmony_ci // count of writes to the prim count buffer. 4190bf215546Sopenharmony_ci tgt->count.offset_start = tgt->count.offset_end; 4191bf215546Sopenharmony_ci tgt->count.accum = 0; 4192bf215546Sopenharmony_ci ice->state.svbi = 0; 4193bf215546Sopenharmony_ci } else { 4194bf215546Sopenharmony_ci if (tgt->offset_res) { 4195bf215546Sopenharmony_ci compute_vertices_written_so_far(ice, tgt, &tgt->count, &ice->state.svbi); 4196bf215546Sopenharmony_ci tgt->count.offset_start = tgt->count.offset_end; 4197bf215546Sopenharmony_ci } 4198bf215546Sopenharmony_ci } 4199bf215546Sopenharmony_ci 4200bf215546Sopenharmony_ci if (!stored_num_prims) { 4201bf215546Sopenharmony_ci crocus_stream_store_prims_written(batch, tgt); 4202bf215546Sopenharmony_ci stored_num_prims = true; 4203bf215546Sopenharmony_ci } 4204bf215546Sopenharmony_ci } else { 4205bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 4206bf215546Sopenharmony_ci (void *) old_tgt[i]; 4207bf215546Sopenharmony_ci if (tgt) { 4208bf215546Sopenharmony_ci if (!stored_num_prims) { 4209bf215546Sopenharmony_ci crocus_stream_store_prims_written(batch, tgt); 4210bf215546Sopenharmony_ci stored_num_prims = true; 4211bf215546Sopenharmony_ci } 4212bf215546Sopenharmony_ci 4213bf215546Sopenharmony_ci if (tgt->offset_res) { 4214bf215546Sopenharmony_ci tgt->prev_count = tgt->count; 4215bf215546Sopenharmony_ci } 4216bf215546Sopenharmony_ci } 4217bf215546Sopenharmony_ci } 4218bf215546Sopenharmony_ci pipe_so_target_reference(&old_tgt[i], NULL); 4219bf215546Sopenharmony_ci } 4220bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS; 4221bf215546Sopenharmony_ci#else 4222bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 4223bf215546Sopenharmony_ci if (num_targets) { 4224bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 4225bf215546Sopenharmony_ci (void *) ice->state.so_target[i]; 4226bf215546Sopenharmony_ci 4227bf215546Sopenharmony_ci if (offsets[i] == 0) { 4228bf215546Sopenharmony_ci#if GFX_VER == 8 4229bf215546Sopenharmony_ci if (tgt) 4230bf215546Sopenharmony_ci tgt->zero_offset = true; 4231bf215546Sopenharmony_ci#endif 4232bf215546Sopenharmony_ci crocus_load_register_imm32(batch, GEN7_SO_WRITE_OFFSET(i), 0); 4233bf215546Sopenharmony_ci } 4234bf215546Sopenharmony_ci else if (tgt) 4235bf215546Sopenharmony_ci crocus_load_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i), 4236bf215546Sopenharmony_ci tgt->offset_res->bo, 4237bf215546Sopenharmony_ci tgt->offset_offset); 4238bf215546Sopenharmony_ci } else { 4239bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 4240bf215546Sopenharmony_ci (void *) old_tgt[i]; 4241bf215546Sopenharmony_ci if (tgt) 4242bf215546Sopenharmony_ci crocus_store_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i), 4243bf215546Sopenharmony_ci tgt->offset_res->bo, 4244bf215546Sopenharmony_ci tgt->offset_offset, false); 4245bf215546Sopenharmony_ci } 4246bf215546Sopenharmony_ci pipe_so_target_reference(&old_tgt[i], NULL); 4247bf215546Sopenharmony_ci } 4248bf215546Sopenharmony_ci#endif 4249bf215546Sopenharmony_ci /* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */ 4250bf215546Sopenharmony_ci if (!active) 4251bf215546Sopenharmony_ci return; 4252bf215546Sopenharmony_ci#if GFX_VER >= 7 4253bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS; 4254bf215546Sopenharmony_ci#elif GFX_VER == 6 4255bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN6_SVBI; 4256bf215546Sopenharmony_ci#endif 4257bf215546Sopenharmony_ci} 4258bf215546Sopenharmony_ci 4259bf215546Sopenharmony_ci#endif 4260bf215546Sopenharmony_ci 4261bf215546Sopenharmony_ci#if GFX_VER >= 7 4262bf215546Sopenharmony_ci/** 4263bf215546Sopenharmony_ci * An crocus-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and 4264bf215546Sopenharmony_ci * 3DSTATE_STREAMOUT packets. 4265bf215546Sopenharmony_ci * 4266bf215546Sopenharmony_ci * 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout 4267bf215546Sopenharmony_ci * hardware to record. We can create it entirely based on the shader, with 4268bf215546Sopenharmony_ci * no dynamic state dependencies. 4269bf215546Sopenharmony_ci * 4270bf215546Sopenharmony_ci * 3DSTATE_STREAMOUT is an annoying mix of shader-based information and 4271bf215546Sopenharmony_ci * state-based settings. We capture the shader-related ones here, and merge 4272bf215546Sopenharmony_ci * the rest in at draw time. 4273bf215546Sopenharmony_ci */ 4274bf215546Sopenharmony_cistatic uint32_t * 4275bf215546Sopenharmony_cicrocus_create_so_decl_list(const struct pipe_stream_output_info *info, 4276bf215546Sopenharmony_ci const struct brw_vue_map *vue_map) 4277bf215546Sopenharmony_ci{ 4278bf215546Sopenharmony_ci struct GENX(SO_DECL) so_decl[PIPE_MAX_VERTEX_STREAMS][128]; 4279bf215546Sopenharmony_ci int buffer_mask[PIPE_MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 4280bf215546Sopenharmony_ci int next_offset[PIPE_MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 4281bf215546Sopenharmony_ci int decls[PIPE_MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; 4282bf215546Sopenharmony_ci int max_decls = 0; 4283bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= PIPE_MAX_SO_OUTPUTS); 4284bf215546Sopenharmony_ci 4285bf215546Sopenharmony_ci memset(so_decl, 0, sizeof(so_decl)); 4286bf215546Sopenharmony_ci 4287bf215546Sopenharmony_ci /* Construct the list of SO_DECLs to be emitted. The formatting of the 4288bf215546Sopenharmony_ci * command feels strange -- each dword pair contains a SO_DECL per stream. 4289bf215546Sopenharmony_ci */ 4290bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_outputs; i++) { 4291bf215546Sopenharmony_ci const struct pipe_stream_output *output = &info->output[i]; 4292bf215546Sopenharmony_ci const int buffer = output->output_buffer; 4293bf215546Sopenharmony_ci const int varying = output->register_index; 4294bf215546Sopenharmony_ci const unsigned stream_id = output->stream; 4295bf215546Sopenharmony_ci assert(stream_id < PIPE_MAX_VERTEX_STREAMS); 4296bf215546Sopenharmony_ci 4297bf215546Sopenharmony_ci buffer_mask[stream_id] |= 1 << buffer; 4298bf215546Sopenharmony_ci 4299bf215546Sopenharmony_ci assert(vue_map->varying_to_slot[varying] >= 0); 4300bf215546Sopenharmony_ci 4301bf215546Sopenharmony_ci /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] 4302bf215546Sopenharmony_ci * array. Instead, it simply increments DstOffset for the following 4303bf215546Sopenharmony_ci * input by the number of components that should be skipped. 4304bf215546Sopenharmony_ci * 4305bf215546Sopenharmony_ci * Our hardware is unusual in that it requires us to program SO_DECLs 4306bf215546Sopenharmony_ci * for fake "hole" components, rather than simply taking the offset 4307bf215546Sopenharmony_ci * for each real varying. Each hole can have size 1, 2, 3, or 4; we 4308bf215546Sopenharmony_ci * program as many size = 4 holes as we can, then a final hole to 4309bf215546Sopenharmony_ci * accommodate the final 1, 2, or 3 remaining. 4310bf215546Sopenharmony_ci */ 4311bf215546Sopenharmony_ci int skip_components = output->dst_offset - next_offset[buffer]; 4312bf215546Sopenharmony_ci 4313bf215546Sopenharmony_ci while (skip_components > 0) { 4314bf215546Sopenharmony_ci so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 4315bf215546Sopenharmony_ci .HoleFlag = 1, 4316bf215546Sopenharmony_ci .OutputBufferSlot = output->output_buffer, 4317bf215546Sopenharmony_ci .ComponentMask = (1 << MIN2(skip_components, 4)) - 1, 4318bf215546Sopenharmony_ci }; 4319bf215546Sopenharmony_ci skip_components -= 4; 4320bf215546Sopenharmony_ci } 4321bf215546Sopenharmony_ci 4322bf215546Sopenharmony_ci next_offset[buffer] = output->dst_offset + output->num_components; 4323bf215546Sopenharmony_ci 4324bf215546Sopenharmony_ci so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { 4325bf215546Sopenharmony_ci .OutputBufferSlot = output->output_buffer, 4326bf215546Sopenharmony_ci .RegisterIndex = vue_map->varying_to_slot[varying], 4327bf215546Sopenharmony_ci .ComponentMask = 4328bf215546Sopenharmony_ci ((1 << output->num_components) - 1) << output->start_component, 4329bf215546Sopenharmony_ci }; 4330bf215546Sopenharmony_ci 4331bf215546Sopenharmony_ci if (decls[stream_id] > max_decls) 4332bf215546Sopenharmony_ci max_decls = decls[stream_id]; 4333bf215546Sopenharmony_ci } 4334bf215546Sopenharmony_ci 4335bf215546Sopenharmony_ci unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls); 4336bf215546Sopenharmony_ci uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords); 4337bf215546Sopenharmony_ci uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length); 4338bf215546Sopenharmony_ci 4339bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) { 4340bf215546Sopenharmony_ci int urb_entry_read_offset = 0; 4341bf215546Sopenharmony_ci int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - 4342bf215546Sopenharmony_ci urb_entry_read_offset; 4343bf215546Sopenharmony_ci 4344bf215546Sopenharmony_ci /* We always read the whole vertex. This could be reduced at some 4345bf215546Sopenharmony_ci * point by reading less and offsetting the register index in the 4346bf215546Sopenharmony_ci * SO_DECLs. 4347bf215546Sopenharmony_ci */ 4348bf215546Sopenharmony_ci sol.Stream0VertexReadOffset = urb_entry_read_offset; 4349bf215546Sopenharmony_ci sol.Stream0VertexReadLength = urb_entry_read_length - 1; 4350bf215546Sopenharmony_ci sol.Stream1VertexReadOffset = urb_entry_read_offset; 4351bf215546Sopenharmony_ci sol.Stream1VertexReadLength = urb_entry_read_length - 1; 4352bf215546Sopenharmony_ci sol.Stream2VertexReadOffset = urb_entry_read_offset; 4353bf215546Sopenharmony_ci sol.Stream2VertexReadLength = urb_entry_read_length - 1; 4354bf215546Sopenharmony_ci sol.Stream3VertexReadOffset = urb_entry_read_offset; 4355bf215546Sopenharmony_ci sol.Stream3VertexReadLength = urb_entry_read_length - 1; 4356bf215546Sopenharmony_ci 4357bf215546Sopenharmony_ci // TODO: Double-check that stride == 0 means no buffer. Probably this 4358bf215546Sopenharmony_ci // needs to go elsewhere, where the buffer enable stuff is actually 4359bf215546Sopenharmony_ci // known. 4360bf215546Sopenharmony_ci#if GFX_VER < 8 4361bf215546Sopenharmony_ci sol.SOBufferEnable0 = !!info->stride[0]; 4362bf215546Sopenharmony_ci sol.SOBufferEnable1 = !!info->stride[1]; 4363bf215546Sopenharmony_ci sol.SOBufferEnable2 = !!info->stride[2]; 4364bf215546Sopenharmony_ci sol.SOBufferEnable3 = !!info->stride[3]; 4365bf215546Sopenharmony_ci#else 4366bf215546Sopenharmony_ci /* Set buffer pitches; 0 means unbound. */ 4367bf215546Sopenharmony_ci sol.Buffer0SurfacePitch = 4 * info->stride[0]; 4368bf215546Sopenharmony_ci sol.Buffer1SurfacePitch = 4 * info->stride[1]; 4369bf215546Sopenharmony_ci sol.Buffer2SurfacePitch = 4 * info->stride[2]; 4370bf215546Sopenharmony_ci sol.Buffer3SurfacePitch = 4 * info->stride[3]; 4371bf215546Sopenharmony_ci#endif 4372bf215546Sopenharmony_ci } 4373bf215546Sopenharmony_ci 4374bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) { 4375bf215546Sopenharmony_ci list.DWordLength = 3 + 2 * max_decls - 2; 4376bf215546Sopenharmony_ci list.StreamtoBufferSelects0 = buffer_mask[0]; 4377bf215546Sopenharmony_ci list.StreamtoBufferSelects1 = buffer_mask[1]; 4378bf215546Sopenharmony_ci list.StreamtoBufferSelects2 = buffer_mask[2]; 4379bf215546Sopenharmony_ci list.StreamtoBufferSelects3 = buffer_mask[3]; 4380bf215546Sopenharmony_ci list.NumEntries0 = decls[0]; 4381bf215546Sopenharmony_ci list.NumEntries1 = decls[1]; 4382bf215546Sopenharmony_ci list.NumEntries2 = decls[2]; 4383bf215546Sopenharmony_ci list.NumEntries3 = decls[3]; 4384bf215546Sopenharmony_ci } 4385bf215546Sopenharmony_ci 4386bf215546Sopenharmony_ci for (int i = 0; i < max_decls; i++) { 4387bf215546Sopenharmony_ci crocus_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) { 4388bf215546Sopenharmony_ci entry.Stream0Decl = so_decl[0][i]; 4389bf215546Sopenharmony_ci entry.Stream1Decl = so_decl[1][i]; 4390bf215546Sopenharmony_ci entry.Stream2Decl = so_decl[2][i]; 4391bf215546Sopenharmony_ci entry.Stream3Decl = so_decl[3][i]; 4392bf215546Sopenharmony_ci } 4393bf215546Sopenharmony_ci } 4394bf215546Sopenharmony_ci 4395bf215546Sopenharmony_ci return map; 4396bf215546Sopenharmony_ci} 4397bf215546Sopenharmony_ci#endif 4398bf215546Sopenharmony_ci 4399bf215546Sopenharmony_ci#if GFX_VER == 6 4400bf215546Sopenharmony_cistatic void 4401bf215546Sopenharmony_cicrocus_emit_so_svbi(struct crocus_context *ice) 4402bf215546Sopenharmony_ci{ 4403bf215546Sopenharmony_ci struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 4404bf215546Sopenharmony_ci 4405bf215546Sopenharmony_ci unsigned max_vertex = 0xffffffff; 4406bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 4407bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 4408bf215546Sopenharmony_ci (void *) ice->state.so_target[i]; 4409bf215546Sopenharmony_ci if (tgt) 4410bf215546Sopenharmony_ci max_vertex = MIN2(max_vertex, tgt->base.buffer_size / tgt->stride); 4411bf215546Sopenharmony_ci } 4412bf215546Sopenharmony_ci 4413bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) { 4414bf215546Sopenharmony_ci svbi.IndexNumber = 0; 4415bf215546Sopenharmony_ci svbi.StreamedVertexBufferIndex = (uint32_t)ice->state.svbi; /* fix when resuming, based on target's prim count */ 4416bf215546Sopenharmony_ci svbi.MaximumIndex = max_vertex; 4417bf215546Sopenharmony_ci } 4418bf215546Sopenharmony_ci 4419bf215546Sopenharmony_ci /* initialize the rest of the SVBI's to reasonable values so that we don't 4420bf215546Sopenharmony_ci * run out of room writing the regular data. 4421bf215546Sopenharmony_ci */ 4422bf215546Sopenharmony_ci for (int i = 1; i < 4; i++) { 4423bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) { 4424bf215546Sopenharmony_ci svbi.IndexNumber = i; 4425bf215546Sopenharmony_ci svbi.StreamedVertexBufferIndex = 0; 4426bf215546Sopenharmony_ci svbi.MaximumIndex = 0xffffffff; 4427bf215546Sopenharmony_ci } 4428bf215546Sopenharmony_ci } 4429bf215546Sopenharmony_ci} 4430bf215546Sopenharmony_ci 4431bf215546Sopenharmony_ci#endif 4432bf215546Sopenharmony_ci 4433bf215546Sopenharmony_ci 4434bf215546Sopenharmony_ci#if GFX_VER >= 6 4435bf215546Sopenharmony_cistatic bool 4436bf215546Sopenharmony_cicrocus_is_drawing_points(const struct crocus_context *ice) 4437bf215546Sopenharmony_ci{ 4438bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 4439bf215546Sopenharmony_ci 4440bf215546Sopenharmony_ci if (cso_rast->cso.fill_front == PIPE_POLYGON_MODE_POINT || 4441bf215546Sopenharmony_ci cso_rast->cso.fill_back == PIPE_POLYGON_MODE_POINT) 4442bf215546Sopenharmony_ci return true; 4443bf215546Sopenharmony_ci 4444bf215546Sopenharmony_ci if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) { 4445bf215546Sopenharmony_ci const struct brw_gs_prog_data *gs_prog_data = 4446bf215546Sopenharmony_ci (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; 4447bf215546Sopenharmony_ci return gs_prog_data->output_topology == _3DPRIM_POINTLIST; 4448bf215546Sopenharmony_ci } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) { 4449bf215546Sopenharmony_ci const struct brw_tes_prog_data *tes_data = 4450bf215546Sopenharmony_ci (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data; 4451bf215546Sopenharmony_ci return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; 4452bf215546Sopenharmony_ci } else { 4453bf215546Sopenharmony_ci return ice->state.prim_mode == PIPE_PRIM_POINTS; 4454bf215546Sopenharmony_ci } 4455bf215546Sopenharmony_ci} 4456bf215546Sopenharmony_ci#endif 4457bf215546Sopenharmony_ci 4458bf215546Sopenharmony_ci#if GFX_VER >= 6 4459bf215546Sopenharmony_cistatic void 4460bf215546Sopenharmony_ciget_attr_override( 4461bf215546Sopenharmony_ci struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr, 4462bf215546Sopenharmony_ci const struct brw_vue_map *vue_map, 4463bf215546Sopenharmony_ci int urb_entry_read_offset, int fs_attr, 4464bf215546Sopenharmony_ci bool two_side_color, uint32_t *max_source_attr) 4465bf215546Sopenharmony_ci{ 4466bf215546Sopenharmony_ci /* Find the VUE slot for this attribute. */ 4467bf215546Sopenharmony_ci int slot = vue_map->varying_to_slot[fs_attr]; 4468bf215546Sopenharmony_ci 4469bf215546Sopenharmony_ci /* Viewport and Layer are stored in the VUE header. We need to override 4470bf215546Sopenharmony_ci * them to zero if earlier stages didn't write them, as GL requires that 4471bf215546Sopenharmony_ci * they read back as zero when not explicitly set. 4472bf215546Sopenharmony_ci */ 4473bf215546Sopenharmony_ci if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) { 4474bf215546Sopenharmony_ci attr->ComponentOverrideX = true; 4475bf215546Sopenharmony_ci attr->ComponentOverrideW = true; 4476bf215546Sopenharmony_ci attr->ConstantSource = CONST_0000; 4477bf215546Sopenharmony_ci 4478bf215546Sopenharmony_ci if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) 4479bf215546Sopenharmony_ci attr->ComponentOverrideY = true; 4480bf215546Sopenharmony_ci if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) 4481bf215546Sopenharmony_ci attr->ComponentOverrideZ = true; 4482bf215546Sopenharmony_ci 4483bf215546Sopenharmony_ci return; 4484bf215546Sopenharmony_ci } 4485bf215546Sopenharmony_ci 4486bf215546Sopenharmony_ci /* If there was only a back color written but not front, use back 4487bf215546Sopenharmony_ci * as the color instead of undefined 4488bf215546Sopenharmony_ci */ 4489bf215546Sopenharmony_ci if (slot == -1 && fs_attr == VARYING_SLOT_COL0) 4490bf215546Sopenharmony_ci slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; 4491bf215546Sopenharmony_ci if (slot == -1 && fs_attr == VARYING_SLOT_COL1) 4492bf215546Sopenharmony_ci slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; 4493bf215546Sopenharmony_ci 4494bf215546Sopenharmony_ci if (slot == -1) { 4495bf215546Sopenharmony_ci /* This attribute does not exist in the VUE--that means that the vertex 4496bf215546Sopenharmony_ci * shader did not write to it. This means that either: 4497bf215546Sopenharmony_ci * 4498bf215546Sopenharmony_ci * (a) This attribute is a texture coordinate, and it is going to be 4499bf215546Sopenharmony_ci * replaced with point coordinates (as a consequence of a call to 4500bf215546Sopenharmony_ci * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the 4501bf215546Sopenharmony_ci * hardware will ignore whatever attribute override we supply. 4502bf215546Sopenharmony_ci * 4503bf215546Sopenharmony_ci * (b) This attribute is read by the fragment shader but not written by 4504bf215546Sopenharmony_ci * the vertex shader, so its value is undefined. Therefore the 4505bf215546Sopenharmony_ci * attribute override we supply doesn't matter. 4506bf215546Sopenharmony_ci * 4507bf215546Sopenharmony_ci * (c) This attribute is gl_PrimitiveID, and it wasn't written by the 4508bf215546Sopenharmony_ci * previous shader stage. 4509bf215546Sopenharmony_ci * 4510bf215546Sopenharmony_ci * Note that we don't have to worry about the cases where the attribute 4511bf215546Sopenharmony_ci * is gl_PointCoord or is undergoing point sprite coordinate 4512bf215546Sopenharmony_ci * replacement, because in those cases, this function isn't called. 4513bf215546Sopenharmony_ci * 4514bf215546Sopenharmony_ci * In case (c), we need to program the attribute overrides so that the 4515bf215546Sopenharmony_ci * primitive ID will be stored in this slot. In every other case, the 4516bf215546Sopenharmony_ci * attribute override we supply doesn't matter. So just go ahead and 4517bf215546Sopenharmony_ci * program primitive ID in every case. 4518bf215546Sopenharmony_ci */ 4519bf215546Sopenharmony_ci attr->ComponentOverrideW = true; 4520bf215546Sopenharmony_ci attr->ComponentOverrideX = true; 4521bf215546Sopenharmony_ci attr->ComponentOverrideY = true; 4522bf215546Sopenharmony_ci attr->ComponentOverrideZ = true; 4523bf215546Sopenharmony_ci attr->ConstantSource = PRIM_ID; 4524bf215546Sopenharmony_ci return; 4525bf215546Sopenharmony_ci } 4526bf215546Sopenharmony_ci 4527bf215546Sopenharmony_ci /* Compute the location of the attribute relative to urb_entry_read_offset. 4528bf215546Sopenharmony_ci * Each increment of urb_entry_read_offset represents a 256-bit value, so 4529bf215546Sopenharmony_ci * it counts for two 128-bit VUE slots. 4530bf215546Sopenharmony_ci */ 4531bf215546Sopenharmony_ci int source_attr = slot - 2 * urb_entry_read_offset; 4532bf215546Sopenharmony_ci assert(source_attr >= 0 && source_attr < 32); 4533bf215546Sopenharmony_ci 4534bf215546Sopenharmony_ci /* If we are doing two-sided color, and the VUE slot following this one 4535bf215546Sopenharmony_ci * represents a back-facing color, then we need to instruct the SF unit to 4536bf215546Sopenharmony_ci * do back-facing swizzling. 4537bf215546Sopenharmony_ci */ 4538bf215546Sopenharmony_ci bool swizzling = two_side_color && 4539bf215546Sopenharmony_ci ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && 4540bf215546Sopenharmony_ci vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || 4541bf215546Sopenharmony_ci (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && 4542bf215546Sopenharmony_ci vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)); 4543bf215546Sopenharmony_ci 4544bf215546Sopenharmony_ci /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */ 4545bf215546Sopenharmony_ci if (*max_source_attr < source_attr + swizzling) 4546bf215546Sopenharmony_ci *max_source_attr = source_attr + swizzling; 4547bf215546Sopenharmony_ci 4548bf215546Sopenharmony_ci attr->SourceAttribute = source_attr; 4549bf215546Sopenharmony_ci if (swizzling) 4550bf215546Sopenharmony_ci attr->SwizzleSelect = INPUTATTR_FACING; 4551bf215546Sopenharmony_ci} 4552bf215546Sopenharmony_ci 4553bf215546Sopenharmony_cistatic void 4554bf215546Sopenharmony_cicalculate_attr_overrides( 4555bf215546Sopenharmony_ci const struct crocus_context *ice, 4556bf215546Sopenharmony_ci struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides, 4557bf215546Sopenharmony_ci uint32_t *point_sprite_enables, 4558bf215546Sopenharmony_ci uint32_t *urb_entry_read_length, 4559bf215546Sopenharmony_ci uint32_t *urb_entry_read_offset) 4560bf215546Sopenharmony_ci{ 4561bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = (void *) 4562bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 4563bf215546Sopenharmony_ci const struct brw_vue_map *vue_map = ice->shaders.last_vue_map; 4564bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 4565bf215546Sopenharmony_ci uint32_t max_source_attr = 0; 4566bf215546Sopenharmony_ci const struct shader_info *fs_info = 4567bf215546Sopenharmony_ci crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT); 4568bf215546Sopenharmony_ci 4569bf215546Sopenharmony_ci int first_slot = 4570bf215546Sopenharmony_ci brw_compute_first_urb_slot_required(fs_info->inputs_read, vue_map); 4571bf215546Sopenharmony_ci 4572bf215546Sopenharmony_ci /* Each URB offset packs two varying slots */ 4573bf215546Sopenharmony_ci assert(first_slot % 2 == 0); 4574bf215546Sopenharmony_ci *urb_entry_read_offset = first_slot / 2; 4575bf215546Sopenharmony_ci *point_sprite_enables = 0; 4576bf215546Sopenharmony_ci 4577bf215546Sopenharmony_ci for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) { 4578bf215546Sopenharmony_ci const int input_index = wm_prog_data->urb_setup[fs_attr]; 4579bf215546Sopenharmony_ci 4580bf215546Sopenharmony_ci if (input_index < 0) 4581bf215546Sopenharmony_ci continue; 4582bf215546Sopenharmony_ci 4583bf215546Sopenharmony_ci bool point_sprite = false; 4584bf215546Sopenharmony_ci if (crocus_is_drawing_points(ice)) { 4585bf215546Sopenharmony_ci if (fs_attr >= VARYING_SLOT_TEX0 && 4586bf215546Sopenharmony_ci fs_attr <= VARYING_SLOT_TEX7 && 4587bf215546Sopenharmony_ci cso_rast->cso.sprite_coord_enable & (1 << (fs_attr - VARYING_SLOT_TEX0))) 4588bf215546Sopenharmony_ci point_sprite = true; 4589bf215546Sopenharmony_ci 4590bf215546Sopenharmony_ci if (fs_attr == VARYING_SLOT_PNTC) 4591bf215546Sopenharmony_ci point_sprite = true; 4592bf215546Sopenharmony_ci 4593bf215546Sopenharmony_ci if (point_sprite) 4594bf215546Sopenharmony_ci *point_sprite_enables |= 1U << input_index; 4595bf215546Sopenharmony_ci } 4596bf215546Sopenharmony_ci 4597bf215546Sopenharmony_ci struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 }; 4598bf215546Sopenharmony_ci if (!point_sprite) { 4599bf215546Sopenharmony_ci get_attr_override(&attribute, vue_map, *urb_entry_read_offset, fs_attr, 4600bf215546Sopenharmony_ci cso_rast->cso.light_twoside, &max_source_attr); 4601bf215546Sopenharmony_ci } 4602bf215546Sopenharmony_ci 4603bf215546Sopenharmony_ci /* The hardware can only do the overrides on 16 overrides at a 4604bf215546Sopenharmony_ci * time, and the other up to 16 have to be lined up so that the 4605bf215546Sopenharmony_ci * input index = the output index. We'll need to do some 4606bf215546Sopenharmony_ci * tweaking to make sure that's the case. 4607bf215546Sopenharmony_ci */ 4608bf215546Sopenharmony_ci if (input_index < 16) 4609bf215546Sopenharmony_ci attr_overrides[input_index] = attribute; 4610bf215546Sopenharmony_ci else 4611bf215546Sopenharmony_ci assert(attribute.SourceAttribute == input_index); 4612bf215546Sopenharmony_ci } 4613bf215546Sopenharmony_ci 4614bf215546Sopenharmony_ci /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for 4615bf215546Sopenharmony_ci * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": 4616bf215546Sopenharmony_ci * 4617bf215546Sopenharmony_ci * "This field should be set to the minimum length required to read the 4618bf215546Sopenharmony_ci * maximum source attribute. The maximum source attribute is indicated 4619bf215546Sopenharmony_ci * by the maximum value of the enabled Attribute # Source Attribute if 4620bf215546Sopenharmony_ci * Attribute Swizzle Enable is set, Number of Output Attributes-1 if 4621bf215546Sopenharmony_ci * enable is not set. 4622bf215546Sopenharmony_ci * read_length = ceiling((max_source_attr + 1) / 2) 4623bf215546Sopenharmony_ci * 4624bf215546Sopenharmony_ci * [errata] Corruption/Hang possible if length programmed larger than 4625bf215546Sopenharmony_ci * recommended" 4626bf215546Sopenharmony_ci * 4627bf215546Sopenharmony_ci * Similar text exists for Ivy Bridge. 4628bf215546Sopenharmony_ci */ 4629bf215546Sopenharmony_ci *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2); 4630bf215546Sopenharmony_ci} 4631bf215546Sopenharmony_ci#endif 4632bf215546Sopenharmony_ci 4633bf215546Sopenharmony_ci#if GFX_VER >= 7 4634bf215546Sopenharmony_cistatic void 4635bf215546Sopenharmony_cicrocus_emit_sbe(struct crocus_batch *batch, const struct crocus_context *ice) 4636bf215546Sopenharmony_ci{ 4637bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 4638bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = (void *) 4639bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; 4640bf215546Sopenharmony_ci#if GFX_VER >= 8 4641bf215546Sopenharmony_ci struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } }; 4642bf215546Sopenharmony_ci#else 4643bf215546Sopenharmony_ci#define attr_overrides sbe.Attribute 4644bf215546Sopenharmony_ci#endif 4645bf215546Sopenharmony_ci 4646bf215546Sopenharmony_ci uint32_t urb_entry_read_length; 4647bf215546Sopenharmony_ci uint32_t urb_entry_read_offset; 4648bf215546Sopenharmony_ci uint32_t point_sprite_enables; 4649bf215546Sopenharmony_ci 4650bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { 4651bf215546Sopenharmony_ci sbe.AttributeSwizzleEnable = true; 4652bf215546Sopenharmony_ci sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 4653bf215546Sopenharmony_ci sbe.PointSpriteTextureCoordinateOrigin = cso_rast->cso.sprite_coord_mode; 4654bf215546Sopenharmony_ci 4655bf215546Sopenharmony_ci calculate_attr_overrides(ice, 4656bf215546Sopenharmony_ci attr_overrides, 4657bf215546Sopenharmony_ci &point_sprite_enables, 4658bf215546Sopenharmony_ci &urb_entry_read_length, 4659bf215546Sopenharmony_ci &urb_entry_read_offset); 4660bf215546Sopenharmony_ci sbe.VertexURBEntryReadOffset = urb_entry_read_offset; 4661bf215546Sopenharmony_ci sbe.VertexURBEntryReadLength = urb_entry_read_length; 4662bf215546Sopenharmony_ci sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; 4663bf215546Sopenharmony_ci sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables; 4664bf215546Sopenharmony_ci#if GFX_VER >= 8 4665bf215546Sopenharmony_ci sbe.ForceVertexURBEntryReadLength = true; 4666bf215546Sopenharmony_ci sbe.ForceVertexURBEntryReadOffset = true; 4667bf215546Sopenharmony_ci#endif 4668bf215546Sopenharmony_ci } 4669bf215546Sopenharmony_ci#if GFX_VER >= 8 4670bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) { 4671bf215546Sopenharmony_ci for (int i = 0; i < 16; i++) 4672bf215546Sopenharmony_ci sbes.Attribute[i] = attr_overrides[i]; 4673bf215546Sopenharmony_ci } 4674bf215546Sopenharmony_ci#endif 4675bf215546Sopenharmony_ci} 4676bf215546Sopenharmony_ci#endif 4677bf215546Sopenharmony_ci 4678bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */ 4679bf215546Sopenharmony_ci 4680bf215546Sopenharmony_ci/** 4681bf215546Sopenharmony_ci * Populate VS program key fields based on the current state. 4682bf215546Sopenharmony_ci */ 4683bf215546Sopenharmony_cistatic void 4684bf215546Sopenharmony_cicrocus_populate_vs_key(const struct crocus_context *ice, 4685bf215546Sopenharmony_ci const struct shader_info *info, 4686bf215546Sopenharmony_ci gl_shader_stage last_stage, 4687bf215546Sopenharmony_ci struct brw_vs_prog_key *key) 4688bf215546Sopenharmony_ci{ 4689bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 4690bf215546Sopenharmony_ci 4691bf215546Sopenharmony_ci if (info->clip_distance_array_size == 0 && 4692bf215546Sopenharmony_ci (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) && 4693bf215546Sopenharmony_ci last_stage == MESA_SHADER_VERTEX) 4694bf215546Sopenharmony_ci key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 4695bf215546Sopenharmony_ci 4696bf215546Sopenharmony_ci if (last_stage == MESA_SHADER_VERTEX && 4697bf215546Sopenharmony_ci info->outputs_written & (VARYING_BIT_PSIZ)) 4698bf215546Sopenharmony_ci key->clamp_pointsize = 1; 4699bf215546Sopenharmony_ci 4700bf215546Sopenharmony_ci#if GFX_VER <= 5 4701bf215546Sopenharmony_ci key->copy_edgeflag = (cso_rast->cso.fill_back != PIPE_POLYGON_MODE_FILL || 4702bf215546Sopenharmony_ci cso_rast->cso.fill_front != PIPE_POLYGON_MODE_FILL); 4703bf215546Sopenharmony_ci key->point_coord_replace = cso_rast->cso.sprite_coord_enable & 0xff; 4704bf215546Sopenharmony_ci#endif 4705bf215546Sopenharmony_ci 4706bf215546Sopenharmony_ci key->clamp_vertex_color = cso_rast->cso.clamp_vertex_color; 4707bf215546Sopenharmony_ci 4708bf215546Sopenharmony_ci#if GFX_VERx10 < 75 4709bf215546Sopenharmony_ci uint64_t inputs_read = info->inputs_read; 4710bf215546Sopenharmony_ci int ve_idx = 0; 4711bf215546Sopenharmony_ci while (inputs_read) { 4712bf215546Sopenharmony_ci int i = u_bit_scan64(&inputs_read); 4713bf215546Sopenharmony_ci key->gl_attrib_wa_flags[i] = ice->state.cso_vertex_elements->wa_flags[ve_idx]; 4714bf215546Sopenharmony_ci ve_idx++; 4715bf215546Sopenharmony_ci } 4716bf215546Sopenharmony_ci#endif 4717bf215546Sopenharmony_ci} 4718bf215546Sopenharmony_ci 4719bf215546Sopenharmony_ci/** 4720bf215546Sopenharmony_ci * Populate TCS program key fields based on the current state. 4721bf215546Sopenharmony_ci */ 4722bf215546Sopenharmony_cistatic void 4723bf215546Sopenharmony_cicrocus_populate_tcs_key(const struct crocus_context *ice, 4724bf215546Sopenharmony_ci struct brw_tcs_prog_key *key) 4725bf215546Sopenharmony_ci{ 4726bf215546Sopenharmony_ci} 4727bf215546Sopenharmony_ci 4728bf215546Sopenharmony_ci/** 4729bf215546Sopenharmony_ci * Populate TES program key fields based on the current state. 4730bf215546Sopenharmony_ci */ 4731bf215546Sopenharmony_cistatic void 4732bf215546Sopenharmony_cicrocus_populate_tes_key(const struct crocus_context *ice, 4733bf215546Sopenharmony_ci const struct shader_info *info, 4734bf215546Sopenharmony_ci gl_shader_stage last_stage, 4735bf215546Sopenharmony_ci struct brw_tes_prog_key *key) 4736bf215546Sopenharmony_ci{ 4737bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 4738bf215546Sopenharmony_ci 4739bf215546Sopenharmony_ci if (info->clip_distance_array_size == 0 && 4740bf215546Sopenharmony_ci (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) && 4741bf215546Sopenharmony_ci last_stage == MESA_SHADER_TESS_EVAL) 4742bf215546Sopenharmony_ci key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 4743bf215546Sopenharmony_ci 4744bf215546Sopenharmony_ci if (last_stage == MESA_SHADER_TESS_EVAL && 4745bf215546Sopenharmony_ci info->outputs_written & (VARYING_BIT_PSIZ)) 4746bf215546Sopenharmony_ci key->clamp_pointsize = 1; 4747bf215546Sopenharmony_ci} 4748bf215546Sopenharmony_ci 4749bf215546Sopenharmony_ci/** 4750bf215546Sopenharmony_ci * Populate GS program key fields based on the current state. 4751bf215546Sopenharmony_ci */ 4752bf215546Sopenharmony_cistatic void 4753bf215546Sopenharmony_cicrocus_populate_gs_key(const struct crocus_context *ice, 4754bf215546Sopenharmony_ci const struct shader_info *info, 4755bf215546Sopenharmony_ci gl_shader_stage last_stage, 4756bf215546Sopenharmony_ci struct brw_gs_prog_key *key) 4757bf215546Sopenharmony_ci{ 4758bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 4759bf215546Sopenharmony_ci 4760bf215546Sopenharmony_ci if (info->clip_distance_array_size == 0 && 4761bf215546Sopenharmony_ci (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) && 4762bf215546Sopenharmony_ci last_stage == MESA_SHADER_GEOMETRY) 4763bf215546Sopenharmony_ci key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts; 4764bf215546Sopenharmony_ci 4765bf215546Sopenharmony_ci if (last_stage == MESA_SHADER_GEOMETRY && 4766bf215546Sopenharmony_ci info->outputs_written & (VARYING_BIT_PSIZ)) 4767bf215546Sopenharmony_ci key->clamp_pointsize = 1; 4768bf215546Sopenharmony_ci} 4769bf215546Sopenharmony_ci 4770bf215546Sopenharmony_ci/** 4771bf215546Sopenharmony_ci * Populate FS program key fields based on the current state. 4772bf215546Sopenharmony_ci */ 4773bf215546Sopenharmony_cistatic void 4774bf215546Sopenharmony_cicrocus_populate_fs_key(const struct crocus_context *ice, 4775bf215546Sopenharmony_ci const struct shader_info *info, 4776bf215546Sopenharmony_ci struct brw_wm_prog_key *key) 4777bf215546Sopenharmony_ci{ 4778bf215546Sopenharmony_ci struct crocus_screen *screen = (void *) ice->ctx.screen; 4779bf215546Sopenharmony_ci const struct pipe_framebuffer_state *fb = &ice->state.framebuffer; 4780bf215546Sopenharmony_ci const struct crocus_depth_stencil_alpha_state *zsa = ice->state.cso_zsa; 4781bf215546Sopenharmony_ci const struct crocus_rasterizer_state *rast = ice->state.cso_rast; 4782bf215546Sopenharmony_ci const struct crocus_blend_state *blend = ice->state.cso_blend; 4783bf215546Sopenharmony_ci 4784bf215546Sopenharmony_ci#if GFX_VER < 6 4785bf215546Sopenharmony_ci uint32_t lookup = 0; 4786bf215546Sopenharmony_ci 4787bf215546Sopenharmony_ci if (info->fs.uses_discard || zsa->cso.alpha_enabled) 4788bf215546Sopenharmony_ci lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT; 4789bf215546Sopenharmony_ci 4790bf215546Sopenharmony_ci if (info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) 4791bf215546Sopenharmony_ci lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT; 4792bf215546Sopenharmony_ci 4793bf215546Sopenharmony_ci if (fb->zsbuf && zsa->cso.depth_enabled) { 4794bf215546Sopenharmony_ci lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT; 4795bf215546Sopenharmony_ci 4796bf215546Sopenharmony_ci if (zsa->cso.depth_writemask) 4797bf215546Sopenharmony_ci lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT; 4798bf215546Sopenharmony_ci 4799bf215546Sopenharmony_ci } 4800bf215546Sopenharmony_ci if (zsa->cso.stencil[0].enabled || zsa->cso.stencil[1].enabled) { 4801bf215546Sopenharmony_ci lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT; 4802bf215546Sopenharmony_ci if (zsa->cso.stencil[0].writemask || zsa->cso.stencil[1].writemask) 4803bf215546Sopenharmony_ci lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT; 4804bf215546Sopenharmony_ci } 4805bf215546Sopenharmony_ci key->iz_lookup = lookup; 4806bf215546Sopenharmony_ci key->stats_wm = ice->state.stats_wm; 4807bf215546Sopenharmony_ci#endif 4808bf215546Sopenharmony_ci 4809bf215546Sopenharmony_ci uint32_t line_aa = BRW_WM_AA_NEVER; 4810bf215546Sopenharmony_ci if (rast->cso.line_smooth) { 4811bf215546Sopenharmony_ci int reduced_prim = ice->state.reduced_prim_mode; 4812bf215546Sopenharmony_ci if (reduced_prim == PIPE_PRIM_LINES) 4813bf215546Sopenharmony_ci line_aa = BRW_WM_AA_ALWAYS; 4814bf215546Sopenharmony_ci else if (reduced_prim == PIPE_PRIM_TRIANGLES) { 4815bf215546Sopenharmony_ci if (rast->cso.fill_front == PIPE_POLYGON_MODE_LINE) { 4816bf215546Sopenharmony_ci line_aa = BRW_WM_AA_SOMETIMES; 4817bf215546Sopenharmony_ci 4818bf215546Sopenharmony_ci if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE || 4819bf215546Sopenharmony_ci rast->cso.cull_face == PIPE_FACE_BACK) 4820bf215546Sopenharmony_ci line_aa = BRW_WM_AA_ALWAYS; 4821bf215546Sopenharmony_ci } else if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE) { 4822bf215546Sopenharmony_ci line_aa = BRW_WM_AA_SOMETIMES; 4823bf215546Sopenharmony_ci 4824bf215546Sopenharmony_ci if (rast->cso.cull_face == PIPE_FACE_FRONT) 4825bf215546Sopenharmony_ci line_aa = BRW_WM_AA_ALWAYS; 4826bf215546Sopenharmony_ci } 4827bf215546Sopenharmony_ci } 4828bf215546Sopenharmony_ci } 4829bf215546Sopenharmony_ci key->line_aa = line_aa; 4830bf215546Sopenharmony_ci 4831bf215546Sopenharmony_ci key->nr_color_regions = fb->nr_cbufs; 4832bf215546Sopenharmony_ci 4833bf215546Sopenharmony_ci key->clamp_fragment_color = rast->cso.clamp_fragment_color; 4834bf215546Sopenharmony_ci 4835bf215546Sopenharmony_ci key->alpha_to_coverage = blend->cso.alpha_to_coverage; 4836bf215546Sopenharmony_ci 4837bf215546Sopenharmony_ci key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->cso.alpha_enabled; 4838bf215546Sopenharmony_ci 4839bf215546Sopenharmony_ci key->flat_shade = rast->cso.flatshade && 4840bf215546Sopenharmony_ci (info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)); 4841bf215546Sopenharmony_ci 4842bf215546Sopenharmony_ci key->persample_interp = rast->cso.force_persample_interp; 4843bf215546Sopenharmony_ci key->multisample_fbo = rast->cso.multisample && fb->samples > 1; 4844bf215546Sopenharmony_ci 4845bf215546Sopenharmony_ci key->ignore_sample_mask_out = !key->multisample_fbo; 4846bf215546Sopenharmony_ci key->coherent_fb_fetch = false; // TODO: needed? 4847bf215546Sopenharmony_ci 4848bf215546Sopenharmony_ci key->force_dual_color_blend = 4849bf215546Sopenharmony_ci screen->driconf.dual_color_blend_by_location && 4850bf215546Sopenharmony_ci (blend->blend_enables & 1) && blend->dual_color_blending; 4851bf215546Sopenharmony_ci 4852bf215546Sopenharmony_ci#if GFX_VER <= 5 4853bf215546Sopenharmony_ci if (fb->nr_cbufs > 1 && zsa->cso.alpha_enabled) { 4854bf215546Sopenharmony_ci key->emit_alpha_test = true; 4855bf215546Sopenharmony_ci key->alpha_test_func = zsa->cso.alpha_func; 4856bf215546Sopenharmony_ci key->alpha_test_ref = zsa->cso.alpha_ref_value; 4857bf215546Sopenharmony_ci } 4858bf215546Sopenharmony_ci#endif 4859bf215546Sopenharmony_ci} 4860bf215546Sopenharmony_ci 4861bf215546Sopenharmony_cistatic void 4862bf215546Sopenharmony_cicrocus_populate_cs_key(const struct crocus_context *ice, 4863bf215546Sopenharmony_ci struct brw_cs_prog_key *key) 4864bf215546Sopenharmony_ci{ 4865bf215546Sopenharmony_ci} 4866bf215546Sopenharmony_ci 4867bf215546Sopenharmony_ci#if GFX_VER == 4 4868bf215546Sopenharmony_ci#define KSP(ice, shader) ro_bo((ice)->shaders.cache_bo, (shader)->offset); 4869bf215546Sopenharmony_ci#elif GFX_VER >= 5 4870bf215546Sopenharmony_cistatic uint64_t 4871bf215546Sopenharmony_ciKSP(const struct crocus_context *ice, const struct crocus_compiled_shader *shader) 4872bf215546Sopenharmony_ci{ 4873bf215546Sopenharmony_ci return shader->offset; 4874bf215546Sopenharmony_ci} 4875bf215546Sopenharmony_ci#endif 4876bf215546Sopenharmony_ci 4877bf215546Sopenharmony_ci/* Gen11 workaround table #2056 WABTPPrefetchDisable suggests to disable 4878bf215546Sopenharmony_ci * prefetching of binding tables in A0 and B0 steppings. XXX: Revisit 4879bf215546Sopenharmony_ci * this WA on C0 stepping. 4880bf215546Sopenharmony_ci * 4881bf215546Sopenharmony_ci * TODO: Fill out SamplerCount for prefetching? 4882bf215546Sopenharmony_ci */ 4883bf215546Sopenharmony_ci 4884bf215546Sopenharmony_ci#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \ 4885bf215546Sopenharmony_ci pkt.KernelStartPointer = KSP(ice, shader); \ 4886bf215546Sopenharmony_ci pkt.BindingTableEntryCount = shader->bt.size_bytes / 4; \ 4887bf215546Sopenharmony_ci pkt.FloatingPointMode = prog_data->use_alt_mode; \ 4888bf215546Sopenharmony_ci \ 4889bf215546Sopenharmony_ci pkt.DispatchGRFStartRegisterForURBData = \ 4890bf215546Sopenharmony_ci prog_data->dispatch_grf_start_reg; \ 4891bf215546Sopenharmony_ci pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ 4892bf215546Sopenharmony_ci pkt.prefix##URBEntryReadOffset = 0; \ 4893bf215546Sopenharmony_ci \ 4894bf215546Sopenharmony_ci pkt.StatisticsEnable = true; \ 4895bf215546Sopenharmony_ci pkt.Enable = true; \ 4896bf215546Sopenharmony_ci \ 4897bf215546Sopenharmony_ci if (prog_data->total_scratch) { \ 4898bf215546Sopenharmony_ci struct crocus_bo *bo = \ 4899bf215546Sopenharmony_ci crocus_get_scratch_space(ice, prog_data->total_scratch, stage); \ 4900bf215546Sopenharmony_ci pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \ 4901bf215546Sopenharmony_ci pkt.ScratchSpaceBasePointer = rw_bo(bo, 0); \ 4902bf215546Sopenharmony_ci } 4903bf215546Sopenharmony_ci 4904bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */ 4905bf215546Sopenharmony_ci#if GFX_VER >= 6 4906bf215546Sopenharmony_cistatic const uint32_t push_constant_opcodes[] = { 4907bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = 21, 4908bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = 25, /* HS */ 4909bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = 26, /* DS */ 4910bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = 22, 4911bf215546Sopenharmony_ci [MESA_SHADER_FRAGMENT] = 23, 4912bf215546Sopenharmony_ci [MESA_SHADER_COMPUTE] = 0, 4913bf215546Sopenharmony_ci}; 4914bf215546Sopenharmony_ci#endif 4915bf215546Sopenharmony_ci 4916bf215546Sopenharmony_cistatic void 4917bf215546Sopenharmony_ciemit_sized_null_surface(struct crocus_batch *batch, 4918bf215546Sopenharmony_ci unsigned width, unsigned height, 4919bf215546Sopenharmony_ci unsigned layers, unsigned levels, 4920bf215546Sopenharmony_ci unsigned minimum_array_element, 4921bf215546Sopenharmony_ci uint32_t *out_offset) 4922bf215546Sopenharmony_ci{ 4923bf215546Sopenharmony_ci struct isl_device *isl_dev = &batch->screen->isl_dev; 4924bf215546Sopenharmony_ci uint32_t *surf = stream_state(batch, isl_dev->ss.size, 4925bf215546Sopenharmony_ci isl_dev->ss.align, 4926bf215546Sopenharmony_ci out_offset); 4927bf215546Sopenharmony_ci //TODO gen 6 multisample crash 4928bf215546Sopenharmony_ci isl_null_fill_state(isl_dev, surf, 4929bf215546Sopenharmony_ci .size = isl_extent3d(width, height, layers), 4930bf215546Sopenharmony_ci .levels = levels, 4931bf215546Sopenharmony_ci .minimum_array_element = minimum_array_element); 4932bf215546Sopenharmony_ci} 4933bf215546Sopenharmony_cistatic void 4934bf215546Sopenharmony_ciemit_null_surface(struct crocus_batch *batch, 4935bf215546Sopenharmony_ci uint32_t *out_offset) 4936bf215546Sopenharmony_ci{ 4937bf215546Sopenharmony_ci emit_sized_null_surface(batch, 1, 1, 1, 0, 0, out_offset); 4938bf215546Sopenharmony_ci} 4939bf215546Sopenharmony_ci 4940bf215546Sopenharmony_cistatic void 4941bf215546Sopenharmony_ciemit_null_fb_surface(struct crocus_batch *batch, 4942bf215546Sopenharmony_ci struct crocus_context *ice, 4943bf215546Sopenharmony_ci uint32_t *out_offset) 4944bf215546Sopenharmony_ci{ 4945bf215546Sopenharmony_ci uint32_t width, height, layers, level, layer; 4946bf215546Sopenharmony_ci /* If set_framebuffer_state() was never called, fall back to 1x1x1 */ 4947bf215546Sopenharmony_ci if (ice->state.framebuffer.width == 0 && ice->state.framebuffer.height == 0) { 4948bf215546Sopenharmony_ci emit_null_surface(batch, out_offset); 4949bf215546Sopenharmony_ci return; 4950bf215546Sopenharmony_ci } 4951bf215546Sopenharmony_ci 4952bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso = &ice->state.framebuffer; 4953bf215546Sopenharmony_ci width = MAX2(cso->width, 1); 4954bf215546Sopenharmony_ci height = MAX2(cso->height, 1); 4955bf215546Sopenharmony_ci layers = cso->layers ? cso->layers : 1; 4956bf215546Sopenharmony_ci level = 0; 4957bf215546Sopenharmony_ci layer = 0; 4958bf215546Sopenharmony_ci 4959bf215546Sopenharmony_ci if (cso->nr_cbufs == 0 && cso->zsbuf) { 4960bf215546Sopenharmony_ci width = cso->zsbuf->width; 4961bf215546Sopenharmony_ci height = cso->zsbuf->height; 4962bf215546Sopenharmony_ci level = cso->zsbuf->u.tex.level; 4963bf215546Sopenharmony_ci layer = cso->zsbuf->u.tex.first_layer; 4964bf215546Sopenharmony_ci } 4965bf215546Sopenharmony_ci emit_sized_null_surface(batch, width, height, 4966bf215546Sopenharmony_ci layers, level, layer, 4967bf215546Sopenharmony_ci out_offset); 4968bf215546Sopenharmony_ci} 4969bf215546Sopenharmony_ci 4970bf215546Sopenharmony_cistatic void 4971bf215546Sopenharmony_ciemit_surface_state(struct crocus_batch *batch, 4972bf215546Sopenharmony_ci struct crocus_resource *res, 4973bf215546Sopenharmony_ci const struct isl_surf *in_surf, 4974bf215546Sopenharmony_ci bool adjust_surf, 4975bf215546Sopenharmony_ci struct isl_view *in_view, 4976bf215546Sopenharmony_ci bool writeable, 4977bf215546Sopenharmony_ci enum isl_aux_usage aux_usage, 4978bf215546Sopenharmony_ci bool blend_enable, 4979bf215546Sopenharmony_ci uint32_t write_disables, 4980bf215546Sopenharmony_ci uint32_t *surf_state, 4981bf215546Sopenharmony_ci uint32_t addr_offset) 4982bf215546Sopenharmony_ci{ 4983bf215546Sopenharmony_ci struct isl_device *isl_dev = &batch->screen->isl_dev; 4984bf215546Sopenharmony_ci uint32_t reloc = RELOC_32BIT; 4985bf215546Sopenharmony_ci uint64_t offset_B = res->offset; 4986bf215546Sopenharmony_ci uint32_t tile_x_sa = 0, tile_y_sa = 0; 4987bf215546Sopenharmony_ci 4988bf215546Sopenharmony_ci if (writeable) 4989bf215546Sopenharmony_ci reloc |= RELOC_WRITE; 4990bf215546Sopenharmony_ci 4991bf215546Sopenharmony_ci struct isl_surf surf = *in_surf; 4992bf215546Sopenharmony_ci struct isl_view view = *in_view; 4993bf215546Sopenharmony_ci if (adjust_surf) { 4994bf215546Sopenharmony_ci if (res->base.b.target == PIPE_TEXTURE_3D && view.array_len == 1) { 4995bf215546Sopenharmony_ci isl_surf_get_image_surf(isl_dev, in_surf, 4996bf215546Sopenharmony_ci view.base_level, 0, 4997bf215546Sopenharmony_ci view.base_array_layer, 4998bf215546Sopenharmony_ci &surf, &offset_B, 4999bf215546Sopenharmony_ci &tile_x_sa, &tile_y_sa); 5000bf215546Sopenharmony_ci view.base_array_layer = 0; 5001bf215546Sopenharmony_ci view.base_level = 0; 5002bf215546Sopenharmony_ci } else if (res->base.b.target == PIPE_TEXTURE_CUBE && GFX_VER == 4) { 5003bf215546Sopenharmony_ci isl_surf_get_image_surf(isl_dev, in_surf, 5004bf215546Sopenharmony_ci view.base_level, view.base_array_layer, 5005bf215546Sopenharmony_ci 0, 5006bf215546Sopenharmony_ci &surf, &offset_B, 5007bf215546Sopenharmony_ci &tile_x_sa, &tile_y_sa); 5008bf215546Sopenharmony_ci view.base_array_layer = 0; 5009bf215546Sopenharmony_ci view.base_level = 0; 5010bf215546Sopenharmony_ci } else if (res->base.b.target == PIPE_TEXTURE_1D_ARRAY) 5011bf215546Sopenharmony_ci surf.dim = ISL_SURF_DIM_2D; 5012bf215546Sopenharmony_ci } 5013bf215546Sopenharmony_ci 5014bf215546Sopenharmony_ci union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; 5015bf215546Sopenharmony_ci struct crocus_bo *aux_bo = NULL; 5016bf215546Sopenharmony_ci uint32_t aux_offset = 0; 5017bf215546Sopenharmony_ci struct isl_surf *aux_surf = NULL; 5018bf215546Sopenharmony_ci if (aux_usage != ISL_AUX_USAGE_NONE) { 5019bf215546Sopenharmony_ci aux_surf = &res->aux.surf; 5020bf215546Sopenharmony_ci aux_offset = res->aux.offset; 5021bf215546Sopenharmony_ci aux_bo = res->aux.bo; 5022bf215546Sopenharmony_ci 5023bf215546Sopenharmony_ci clear_color = crocus_resource_get_clear_color(res); 5024bf215546Sopenharmony_ci } 5025bf215546Sopenharmony_ci 5026bf215546Sopenharmony_ci isl_surf_fill_state(isl_dev, surf_state, 5027bf215546Sopenharmony_ci .surf = &surf, 5028bf215546Sopenharmony_ci .view = &view, 5029bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, 5030bf215546Sopenharmony_ci addr_offset + isl_dev->ss.addr_offset, 5031bf215546Sopenharmony_ci res->bo, offset_B, reloc), 5032bf215546Sopenharmony_ci .aux_surf = aux_surf, 5033bf215546Sopenharmony_ci .aux_usage = aux_usage, 5034bf215546Sopenharmony_ci .aux_address = aux_offset, 5035bf215546Sopenharmony_ci .mocs = crocus_mocs(res->bo, isl_dev), 5036bf215546Sopenharmony_ci .clear_color = clear_color, 5037bf215546Sopenharmony_ci .use_clear_address = false, 5038bf215546Sopenharmony_ci .clear_address = 0, 5039bf215546Sopenharmony_ci .x_offset_sa = tile_x_sa, 5040bf215546Sopenharmony_ci .y_offset_sa = tile_y_sa, 5041bf215546Sopenharmony_ci#if GFX_VER <= 5 5042bf215546Sopenharmony_ci .blend_enable = blend_enable, 5043bf215546Sopenharmony_ci .write_disables = write_disables, 5044bf215546Sopenharmony_ci#endif 5045bf215546Sopenharmony_ci ); 5046bf215546Sopenharmony_ci 5047bf215546Sopenharmony_ci if (aux_surf) { 5048bf215546Sopenharmony_ci /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the 5049bf215546Sopenharmony_ci * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits 5050bf215546Sopenharmony_ci * contain other control information. Since buffer addresses are always 5051bf215546Sopenharmony_ci * on 4k boundaries (and thus have their lower 12 bits zero), we can use 5052bf215546Sopenharmony_ci * an ordinary reloc to do the necessary address translation. 5053bf215546Sopenharmony_ci * 5054bf215546Sopenharmony_ci * FIXME: move to the point of assignment. 5055bf215546Sopenharmony_ci */ 5056bf215546Sopenharmony_ci if (GFX_VER == 8) { 5057bf215546Sopenharmony_ci uint64_t *aux_addr = (uint64_t *)(surf_state + (isl_dev->ss.aux_addr_offset / 4)); 5058bf215546Sopenharmony_ci *aux_addr = crocus_state_reloc(batch, 5059bf215546Sopenharmony_ci addr_offset + isl_dev->ss.aux_addr_offset, 5060bf215546Sopenharmony_ci aux_bo, *aux_addr, 5061bf215546Sopenharmony_ci reloc); 5062bf215546Sopenharmony_ci } else { 5063bf215546Sopenharmony_ci uint32_t *aux_addr = surf_state + (isl_dev->ss.aux_addr_offset / 4); 5064bf215546Sopenharmony_ci *aux_addr = crocus_state_reloc(batch, 5065bf215546Sopenharmony_ci addr_offset + isl_dev->ss.aux_addr_offset, 5066bf215546Sopenharmony_ci aux_bo, *aux_addr, 5067bf215546Sopenharmony_ci reloc); 5068bf215546Sopenharmony_ci } 5069bf215546Sopenharmony_ci } 5070bf215546Sopenharmony_ci 5071bf215546Sopenharmony_ci} 5072bf215546Sopenharmony_ci 5073bf215546Sopenharmony_cistatic uint32_t 5074bf215546Sopenharmony_ciemit_surface(struct crocus_batch *batch, 5075bf215546Sopenharmony_ci struct crocus_surface *surf, 5076bf215546Sopenharmony_ci enum isl_aux_usage aux_usage, 5077bf215546Sopenharmony_ci bool blend_enable, 5078bf215546Sopenharmony_ci uint32_t write_disables) 5079bf215546Sopenharmony_ci{ 5080bf215546Sopenharmony_ci struct isl_device *isl_dev = &batch->screen->isl_dev; 5081bf215546Sopenharmony_ci struct crocus_resource *res = (struct crocus_resource *)surf->base.texture; 5082bf215546Sopenharmony_ci struct isl_view *view = &surf->view; 5083bf215546Sopenharmony_ci uint32_t offset = 0; 5084bf215546Sopenharmony_ci enum pipe_texture_target target = res->base.b.target; 5085bf215546Sopenharmony_ci bool adjust_surf = false; 5086bf215546Sopenharmony_ci 5087bf215546Sopenharmony_ci if (GFX_VER == 4 && target == PIPE_TEXTURE_CUBE) 5088bf215546Sopenharmony_ci adjust_surf = true; 5089bf215546Sopenharmony_ci 5090bf215546Sopenharmony_ci if (surf->align_res) 5091bf215546Sopenharmony_ci res = (struct crocus_resource *)surf->align_res; 5092bf215546Sopenharmony_ci 5093bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset); 5094bf215546Sopenharmony_ci 5095bf215546Sopenharmony_ci emit_surface_state(batch, res, &surf->surf, adjust_surf, view, true, 5096bf215546Sopenharmony_ci aux_usage, blend_enable, 5097bf215546Sopenharmony_ci write_disables, 5098bf215546Sopenharmony_ci surf_state, offset); 5099bf215546Sopenharmony_ci return offset; 5100bf215546Sopenharmony_ci} 5101bf215546Sopenharmony_ci 5102bf215546Sopenharmony_cistatic uint32_t 5103bf215546Sopenharmony_ciemit_rt_surface(struct crocus_batch *batch, 5104bf215546Sopenharmony_ci struct crocus_surface *surf, 5105bf215546Sopenharmony_ci enum isl_aux_usage aux_usage) 5106bf215546Sopenharmony_ci{ 5107bf215546Sopenharmony_ci struct isl_device *isl_dev = &batch->screen->isl_dev; 5108bf215546Sopenharmony_ci struct crocus_resource *res = (struct crocus_resource *)surf->base.texture; 5109bf215546Sopenharmony_ci struct isl_view *view = &surf->read_view; 5110bf215546Sopenharmony_ci uint32_t offset = 0; 5111bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset); 5112bf215546Sopenharmony_ci 5113bf215546Sopenharmony_ci emit_surface_state(batch, res, &surf->surf, true, view, false, 5114bf215546Sopenharmony_ci aux_usage, 0, false, 5115bf215546Sopenharmony_ci surf_state, offset); 5116bf215546Sopenharmony_ci return offset; 5117bf215546Sopenharmony_ci} 5118bf215546Sopenharmony_ci 5119bf215546Sopenharmony_cistatic uint32_t 5120bf215546Sopenharmony_ciemit_grid(struct crocus_context *ice, 5121bf215546Sopenharmony_ci struct crocus_batch *batch) 5122bf215546Sopenharmony_ci{ 5123bf215546Sopenharmony_ci UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 5124bf215546Sopenharmony_ci uint32_t offset = 0; 5125bf215546Sopenharmony_ci struct crocus_state_ref *grid_ref = &ice->state.grid_size; 5126bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, 5127bf215546Sopenharmony_ci isl_dev->ss.align, &offset); 5128bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5129bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5130bf215546Sopenharmony_ci crocus_resource_bo(grid_ref->res), 5131bf215546Sopenharmony_ci grid_ref->offset, 5132bf215546Sopenharmony_ci RELOC_32BIT), 5133bf215546Sopenharmony_ci .size_B = 12, 5134bf215546Sopenharmony_ci .format = ISL_FORMAT_RAW, 5135bf215546Sopenharmony_ci .stride_B = 1, 5136bf215546Sopenharmony_ci .mocs = crocus_mocs(crocus_resource_bo(grid_ref->res), isl_dev)); 5137bf215546Sopenharmony_ci return offset; 5138bf215546Sopenharmony_ci} 5139bf215546Sopenharmony_ci 5140bf215546Sopenharmony_cistatic uint32_t 5141bf215546Sopenharmony_ciemit_ubo_buffer(struct crocus_context *ice, 5142bf215546Sopenharmony_ci struct crocus_batch *batch, 5143bf215546Sopenharmony_ci struct pipe_constant_buffer *buffer) 5144bf215546Sopenharmony_ci{ 5145bf215546Sopenharmony_ci UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 5146bf215546Sopenharmony_ci uint32_t offset = 0; 5147bf215546Sopenharmony_ci 5148bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, 5149bf215546Sopenharmony_ci isl_dev->ss.align, &offset); 5150bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5151bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5152bf215546Sopenharmony_ci crocus_resource_bo(buffer->buffer), 5153bf215546Sopenharmony_ci buffer->buffer_offset, 5154bf215546Sopenharmony_ci RELOC_32BIT), 5155bf215546Sopenharmony_ci .size_B = buffer->buffer_size, 5156bf215546Sopenharmony_ci .format = 0, 5157bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 5158bf215546Sopenharmony_ci .stride_B = 1, 5159bf215546Sopenharmony_ci .mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev)); 5160bf215546Sopenharmony_ci 5161bf215546Sopenharmony_ci return offset; 5162bf215546Sopenharmony_ci} 5163bf215546Sopenharmony_ci 5164bf215546Sopenharmony_cistatic uint32_t 5165bf215546Sopenharmony_ciemit_ssbo_buffer(struct crocus_context *ice, 5166bf215546Sopenharmony_ci struct crocus_batch *batch, 5167bf215546Sopenharmony_ci struct pipe_shader_buffer *buffer, bool writeable) 5168bf215546Sopenharmony_ci{ 5169bf215546Sopenharmony_ci UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 5170bf215546Sopenharmony_ci uint32_t offset = 0; 5171bf215546Sopenharmony_ci uint32_t reloc = RELOC_32BIT; 5172bf215546Sopenharmony_ci 5173bf215546Sopenharmony_ci if (writeable) 5174bf215546Sopenharmony_ci reloc |= RELOC_WRITE; 5175bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, 5176bf215546Sopenharmony_ci isl_dev->ss.align, &offset); 5177bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5178bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5179bf215546Sopenharmony_ci crocus_resource_bo(buffer->buffer), 5180bf215546Sopenharmony_ci buffer->buffer_offset, 5181bf215546Sopenharmony_ci reloc), 5182bf215546Sopenharmony_ci .size_B = buffer->buffer_size, 5183bf215546Sopenharmony_ci .format = ISL_FORMAT_RAW, 5184bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 5185bf215546Sopenharmony_ci .stride_B = 1, 5186bf215546Sopenharmony_ci .mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev)); 5187bf215546Sopenharmony_ci 5188bf215546Sopenharmony_ci return offset; 5189bf215546Sopenharmony_ci} 5190bf215546Sopenharmony_ci 5191bf215546Sopenharmony_cistatic uint32_t 5192bf215546Sopenharmony_ciemit_sampler_view(struct crocus_context *ice, 5193bf215546Sopenharmony_ci struct crocus_batch *batch, 5194bf215546Sopenharmony_ci bool for_gather, 5195bf215546Sopenharmony_ci struct crocus_sampler_view *isv) 5196bf215546Sopenharmony_ci{ 5197bf215546Sopenharmony_ci UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 5198bf215546Sopenharmony_ci uint32_t offset = 0; 5199bf215546Sopenharmony_ci 5200bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, 5201bf215546Sopenharmony_ci isl_dev->ss.align, &offset); 5202bf215546Sopenharmony_ci 5203bf215546Sopenharmony_ci if (isv->base.target == PIPE_BUFFER) { 5204bf215546Sopenharmony_ci const struct isl_format_layout *fmtl = isl_format_get_layout(isv->view.format); 5205bf215546Sopenharmony_ci const unsigned cpp = isv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8; 5206bf215546Sopenharmony_ci unsigned final_size = 5207bf215546Sopenharmony_ci MIN3(isv->base.u.buf.size, isv->res->bo->size - isv->res->offset, 5208bf215546Sopenharmony_ci CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp); 5209bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5210bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5211bf215546Sopenharmony_ci isv->res->bo, 5212bf215546Sopenharmony_ci isv->res->offset + isv->base.u.buf.offset, RELOC_32BIT), 5213bf215546Sopenharmony_ci .size_B = final_size, 5214bf215546Sopenharmony_ci .format = isv->view.format, 5215bf215546Sopenharmony_ci .swizzle = isv->view.swizzle, 5216bf215546Sopenharmony_ci .stride_B = cpp, 5217bf215546Sopenharmony_ci .mocs = crocus_mocs(isv->res->bo, isl_dev) 5218bf215546Sopenharmony_ci ); 5219bf215546Sopenharmony_ci } else { 5220bf215546Sopenharmony_ci enum isl_aux_usage aux_usage = 5221bf215546Sopenharmony_ci crocus_resource_texture_aux_usage(isv->res); 5222bf215546Sopenharmony_ci 5223bf215546Sopenharmony_ci emit_surface_state(batch, isv->res, &isv->res->surf, false, 5224bf215546Sopenharmony_ci for_gather ? &isv->gather_view : &isv->view, 5225bf215546Sopenharmony_ci false, aux_usage, false, 5226bf215546Sopenharmony_ci 0, surf_state, offset); 5227bf215546Sopenharmony_ci } 5228bf215546Sopenharmony_ci return offset; 5229bf215546Sopenharmony_ci} 5230bf215546Sopenharmony_ci 5231bf215546Sopenharmony_cistatic uint32_t 5232bf215546Sopenharmony_ciemit_image_view(struct crocus_context *ice, 5233bf215546Sopenharmony_ci struct crocus_batch *batch, 5234bf215546Sopenharmony_ci struct crocus_image_view *iv) 5235bf215546Sopenharmony_ci{ 5236bf215546Sopenharmony_ci UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 5237bf215546Sopenharmony_ci uint32_t offset = 0; 5238bf215546Sopenharmony_ci 5239bf215546Sopenharmony_ci struct crocus_resource *res = (struct crocus_resource *)iv->base.resource; 5240bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, 5241bf215546Sopenharmony_ci isl_dev->ss.align, &offset); 5242bf215546Sopenharmony_ci bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE; 5243bf215546Sopenharmony_ci uint32_t reloc = RELOC_32BIT | (write ? RELOC_WRITE : 0); 5244bf215546Sopenharmony_ci if (res->base.b.target == PIPE_BUFFER) { 5245bf215546Sopenharmony_ci const struct isl_format_layout *fmtl = isl_format_get_layout(iv->view.format); 5246bf215546Sopenharmony_ci const unsigned cpp = iv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8; 5247bf215546Sopenharmony_ci unsigned final_size = 5248bf215546Sopenharmony_ci MIN3(iv->base.u.buf.size, res->bo->size - res->offset - iv->base.u.buf.offset, 5249bf215546Sopenharmony_ci CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp); 5250bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5251bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5252bf215546Sopenharmony_ci res->bo, 5253bf215546Sopenharmony_ci res->offset + iv->base.u.buf.offset, reloc), 5254bf215546Sopenharmony_ci .size_B = final_size, 5255bf215546Sopenharmony_ci .format = iv->view.format, 5256bf215546Sopenharmony_ci .swizzle = iv->view.swizzle, 5257bf215546Sopenharmony_ci .stride_B = cpp, 5258bf215546Sopenharmony_ci .mocs = crocus_mocs(res->bo, isl_dev) 5259bf215546Sopenharmony_ci ); 5260bf215546Sopenharmony_ci } else { 5261bf215546Sopenharmony_ci if (iv->view.format == ISL_FORMAT_RAW) { 5262bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5263bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5264bf215546Sopenharmony_ci res->bo, 5265bf215546Sopenharmony_ci res->offset, reloc), 5266bf215546Sopenharmony_ci .size_B = res->bo->size - res->offset, 5267bf215546Sopenharmony_ci .format = iv->view.format, 5268bf215546Sopenharmony_ci .swizzle = iv->view.swizzle, 5269bf215546Sopenharmony_ci .stride_B = 1, 5270bf215546Sopenharmony_ci .mocs = crocus_mocs(res->bo, isl_dev), 5271bf215546Sopenharmony_ci ); 5272bf215546Sopenharmony_ci 5273bf215546Sopenharmony_ci 5274bf215546Sopenharmony_ci } else { 5275bf215546Sopenharmony_ci emit_surface_state(batch, res, 5276bf215546Sopenharmony_ci &res->surf, false, &iv->view, 5277bf215546Sopenharmony_ci write, 0, false, 5278bf215546Sopenharmony_ci 0, surf_state, offset); 5279bf215546Sopenharmony_ci } 5280bf215546Sopenharmony_ci } 5281bf215546Sopenharmony_ci 5282bf215546Sopenharmony_ci return offset; 5283bf215546Sopenharmony_ci} 5284bf215546Sopenharmony_ci 5285bf215546Sopenharmony_ci#if GFX_VER == 6 5286bf215546Sopenharmony_cistatic uint32_t 5287bf215546Sopenharmony_ciemit_sol_surface(struct crocus_batch *batch, 5288bf215546Sopenharmony_ci struct pipe_stream_output_info *so_info, 5289bf215546Sopenharmony_ci uint32_t idx) 5290bf215546Sopenharmony_ci{ 5291bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 5292bf215546Sopenharmony_ci 5293bf215546Sopenharmony_ci if (idx >= so_info->num_outputs || !ice->state.streamout_active) 5294bf215546Sopenharmony_ci return 0; 5295bf215546Sopenharmony_ci const struct pipe_stream_output *output = &so_info->output[idx]; 5296bf215546Sopenharmony_ci const int buffer = output->output_buffer; 5297bf215546Sopenharmony_ci assert(output->stream == 0); 5298bf215546Sopenharmony_ci 5299bf215546Sopenharmony_ci struct crocus_resource *buf = (struct crocus_resource *)ice->state.so_target[buffer]->buffer; 5300bf215546Sopenharmony_ci unsigned stride_dwords = so_info->stride[buffer]; 5301bf215546Sopenharmony_ci unsigned offset_dwords = ice->state.so_target[buffer]->buffer_offset / 4 + output->dst_offset; 5302bf215546Sopenharmony_ci 5303bf215546Sopenharmony_ci size_t size_dwords = (ice->state.so_target[buffer]->buffer_offset + ice->state.so_target[buffer]->buffer_size) / 4; 5304bf215546Sopenharmony_ci unsigned num_vector_components = output->num_components; 5305bf215546Sopenharmony_ci unsigned num_elements; 5306bf215546Sopenharmony_ci /* FIXME: can we rely on core Mesa to ensure that the buffer isn't 5307bf215546Sopenharmony_ci * too big to map using a single binding table entry? 5308bf215546Sopenharmony_ci */ 5309bf215546Sopenharmony_ci // assert((size_dwords - offset_dwords) / stride_dwords 5310bf215546Sopenharmony_ci // <= BRW_MAX_NUM_BUFFER_ENTRIES); 5311bf215546Sopenharmony_ci 5312bf215546Sopenharmony_ci if (size_dwords > offset_dwords + num_vector_components) { 5313bf215546Sopenharmony_ci /* There is room for at least 1 transform feedback output in the buffer. 5314bf215546Sopenharmony_ci * Compute the number of additional transform feedback outputs the 5315bf215546Sopenharmony_ci * buffer has room for. 5316bf215546Sopenharmony_ci */ 5317bf215546Sopenharmony_ci num_elements = 5318bf215546Sopenharmony_ci (size_dwords - offset_dwords - num_vector_components); 5319bf215546Sopenharmony_ci } else { 5320bf215546Sopenharmony_ci /* There isn't even room for a single transform feedback output in the 5321bf215546Sopenharmony_ci * buffer. We can't configure the binding table entry to prevent output 5322bf215546Sopenharmony_ci * entirely; we'll have to rely on the geometry shader to detect 5323bf215546Sopenharmony_ci * overflow. But to minimize the damage in case of a bug, set up the 5324bf215546Sopenharmony_ci * binding table entry to just allow a single output. 5325bf215546Sopenharmony_ci */ 5326bf215546Sopenharmony_ci num_elements = 0; 5327bf215546Sopenharmony_ci } 5328bf215546Sopenharmony_ci num_elements += stride_dwords; 5329bf215546Sopenharmony_ci 5330bf215546Sopenharmony_ci uint32_t surface_format; 5331bf215546Sopenharmony_ci switch (num_vector_components) { 5332bf215546Sopenharmony_ci case 1: 5333bf215546Sopenharmony_ci surface_format = ISL_FORMAT_R32_FLOAT; 5334bf215546Sopenharmony_ci break; 5335bf215546Sopenharmony_ci case 2: 5336bf215546Sopenharmony_ci surface_format = ISL_FORMAT_R32G32_FLOAT; 5337bf215546Sopenharmony_ci break; 5338bf215546Sopenharmony_ci case 3: 5339bf215546Sopenharmony_ci surface_format = ISL_FORMAT_R32G32B32_FLOAT; 5340bf215546Sopenharmony_ci break; 5341bf215546Sopenharmony_ci case 4: 5342bf215546Sopenharmony_ci surface_format = ISL_FORMAT_R32G32B32A32_FLOAT; 5343bf215546Sopenharmony_ci break; 5344bf215546Sopenharmony_ci default: 5345bf215546Sopenharmony_ci unreachable("Invalid vector size for transform feedback output"); 5346bf215546Sopenharmony_ci } 5347bf215546Sopenharmony_ci 5348bf215546Sopenharmony_ci UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev; 5349bf215546Sopenharmony_ci uint32_t offset = 0; 5350bf215546Sopenharmony_ci 5351bf215546Sopenharmony_ci uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, 5352bf215546Sopenharmony_ci isl_dev->ss.align, &offset); 5353bf215546Sopenharmony_ci isl_buffer_fill_state(isl_dev, surf_state, 5354bf215546Sopenharmony_ci .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset, 5355bf215546Sopenharmony_ci crocus_resource_bo(&buf->base.b), 5356bf215546Sopenharmony_ci offset_dwords * 4, RELOC_32BIT|RELOC_WRITE), 5357bf215546Sopenharmony_ci .size_B = num_elements * 4, 5358bf215546Sopenharmony_ci .stride_B = stride_dwords * 4, 5359bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 5360bf215546Sopenharmony_ci .format = surface_format); 5361bf215546Sopenharmony_ci return offset; 5362bf215546Sopenharmony_ci} 5363bf215546Sopenharmony_ci#endif 5364bf215546Sopenharmony_ci 5365bf215546Sopenharmony_ci#define foreach_surface_used(index, group) \ 5366bf215546Sopenharmony_ci for (int index = 0; index < bt->sizes[group]; index++) \ 5367bf215546Sopenharmony_ci if (crocus_group_index_to_bti(bt, group, index) != \ 5368bf215546Sopenharmony_ci CROCUS_SURFACE_NOT_USED) 5369bf215546Sopenharmony_ci 5370bf215546Sopenharmony_cistatic void 5371bf215546Sopenharmony_cicrocus_populate_binding_table(struct crocus_context *ice, 5372bf215546Sopenharmony_ci struct crocus_batch *batch, 5373bf215546Sopenharmony_ci gl_shader_stage stage, bool ff_gs) 5374bf215546Sopenharmony_ci{ 5375bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ff_gs ? ice->shaders.ff_gs_prog : ice->shaders.prog[stage]; 5376bf215546Sopenharmony_ci struct crocus_shader_state *shs = ff_gs ? NULL : &ice->state.shaders[stage]; 5377bf215546Sopenharmony_ci if (!shader) 5378bf215546Sopenharmony_ci return; 5379bf215546Sopenharmony_ci 5380bf215546Sopenharmony_ci struct crocus_binding_table *bt = &shader->bt; 5381bf215546Sopenharmony_ci int s = 0; 5382bf215546Sopenharmony_ci uint32_t *surf_offsets = shader->surf_offset; 5383bf215546Sopenharmony_ci 5384bf215546Sopenharmony_ci#if GFX_VER < 8 5385bf215546Sopenharmony_ci const struct shader_info *info = crocus_get_shader_info(ice, stage); 5386bf215546Sopenharmony_ci#endif 5387bf215546Sopenharmony_ci 5388bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) { 5389bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 5390bf215546Sopenharmony_ci /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */ 5391bf215546Sopenharmony_ci if (cso_fb->nr_cbufs) { 5392bf215546Sopenharmony_ci for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 5393bf215546Sopenharmony_ci uint32_t write_disables = 0; 5394bf215546Sopenharmony_ci bool blend_enable = false; 5395bf215546Sopenharmony_ci#if GFX_VER <= 5 5396bf215546Sopenharmony_ci const struct pipe_rt_blend_state *rt = 5397bf215546Sopenharmony_ci &ice->state.cso_blend->cso.rt[ice->state.cso_blend->cso.independent_blend_enable ? i : 0]; 5398bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT]; 5399bf215546Sopenharmony_ci struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 5400bf215546Sopenharmony_ci write_disables |= (rt->colormask & PIPE_MASK_A) ? 0x0 : 0x8; 5401bf215546Sopenharmony_ci write_disables |= (rt->colormask & PIPE_MASK_R) ? 0x0 : 0x4; 5402bf215546Sopenharmony_ci write_disables |= (rt->colormask & PIPE_MASK_G) ? 0x0 : 0x2; 5403bf215546Sopenharmony_ci write_disables |= (rt->colormask & PIPE_MASK_B) ? 0x0 : 0x1; 5404bf215546Sopenharmony_ci /* Gen4/5 can't handle blending off when a dual src blend wm is enabled. */ 5405bf215546Sopenharmony_ci blend_enable = rt->blend_enable || wm_prog_data->dual_src_blend; 5406bf215546Sopenharmony_ci#endif 5407bf215546Sopenharmony_ci if (cso_fb->cbufs[i]) { 5408bf215546Sopenharmony_ci surf_offsets[s] = emit_surface(batch, 5409bf215546Sopenharmony_ci (struct crocus_surface *)cso_fb->cbufs[i], 5410bf215546Sopenharmony_ci ice->state.draw_aux_usage[i], 5411bf215546Sopenharmony_ci blend_enable, 5412bf215546Sopenharmony_ci write_disables); 5413bf215546Sopenharmony_ci } else { 5414bf215546Sopenharmony_ci emit_null_fb_surface(batch, ice, &surf_offsets[s]); 5415bf215546Sopenharmony_ci } 5416bf215546Sopenharmony_ci s++; 5417bf215546Sopenharmony_ci } 5418bf215546Sopenharmony_ci } else { 5419bf215546Sopenharmony_ci emit_null_fb_surface(batch, ice, &surf_offsets[s]); 5420bf215546Sopenharmony_ci s++; 5421bf215546Sopenharmony_ci } 5422bf215546Sopenharmony_ci 5423bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_RENDER_TARGET_READ) { 5424bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 5425bf215546Sopenharmony_ci if (cso_fb->cbufs[i]) { 5426bf215546Sopenharmony_ci surf_offsets[s++] = emit_rt_surface(batch, 5427bf215546Sopenharmony_ci (struct crocus_surface *)cso_fb->cbufs[i], 5428bf215546Sopenharmony_ci ice->state.draw_aux_usage[i]); 5429bf215546Sopenharmony_ci } 5430bf215546Sopenharmony_ci } 5431bf215546Sopenharmony_ci } 5432bf215546Sopenharmony_ci 5433bf215546Sopenharmony_ci if (stage == MESA_SHADER_COMPUTE) { 5434bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_CS_WORK_GROUPS) { 5435bf215546Sopenharmony_ci surf_offsets[s] = emit_grid(ice, batch); 5436bf215546Sopenharmony_ci s++; 5437bf215546Sopenharmony_ci } 5438bf215546Sopenharmony_ci } 5439bf215546Sopenharmony_ci 5440bf215546Sopenharmony_ci#if GFX_VER == 6 5441bf215546Sopenharmony_ci if (stage == MESA_SHADER_GEOMETRY) { 5442bf215546Sopenharmony_ci struct pipe_stream_output_info *so_info; 5443bf215546Sopenharmony_ci if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) 5444bf215546Sopenharmony_ci so_info = &ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]->stream_output; 5445bf215546Sopenharmony_ci else 5446bf215546Sopenharmony_ci so_info = &ice->shaders.uncompiled[MESA_SHADER_VERTEX]->stream_output; 5447bf215546Sopenharmony_ci 5448bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_SOL) { 5449bf215546Sopenharmony_ci surf_offsets[s] = emit_sol_surface(batch, so_info, i); 5450bf215546Sopenharmony_ci s++; 5451bf215546Sopenharmony_ci } 5452bf215546Sopenharmony_ci } 5453bf215546Sopenharmony_ci#endif 5454bf215546Sopenharmony_ci 5455bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE) { 5456bf215546Sopenharmony_ci struct crocus_sampler_view *view = shs->textures[i]; 5457bf215546Sopenharmony_ci if (view) 5458bf215546Sopenharmony_ci surf_offsets[s] = emit_sampler_view(ice, batch, false, view); 5459bf215546Sopenharmony_ci else 5460bf215546Sopenharmony_ci emit_null_surface(batch, &surf_offsets[s]); 5461bf215546Sopenharmony_ci s++; 5462bf215546Sopenharmony_ci } 5463bf215546Sopenharmony_ci 5464bf215546Sopenharmony_ci#if GFX_VER < 8 5465bf215546Sopenharmony_ci if (info && info->uses_texture_gather) { 5466bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE_GATHER) { 5467bf215546Sopenharmony_ci struct crocus_sampler_view *view = shs->textures[i]; 5468bf215546Sopenharmony_ci if (view) 5469bf215546Sopenharmony_ci surf_offsets[s] = emit_sampler_view(ice, batch, true, view); 5470bf215546Sopenharmony_ci else 5471bf215546Sopenharmony_ci emit_null_surface(batch, &surf_offsets[s]); 5472bf215546Sopenharmony_ci s++; 5473bf215546Sopenharmony_ci } 5474bf215546Sopenharmony_ci } 5475bf215546Sopenharmony_ci#endif 5476bf215546Sopenharmony_ci 5477bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_IMAGE) { 5478bf215546Sopenharmony_ci struct crocus_image_view *view = &shs->image[i]; 5479bf215546Sopenharmony_ci if (view->base.resource) 5480bf215546Sopenharmony_ci surf_offsets[s] = emit_image_view(ice, batch, view); 5481bf215546Sopenharmony_ci else 5482bf215546Sopenharmony_ci emit_null_surface(batch, &surf_offsets[s]); 5483bf215546Sopenharmony_ci s++; 5484bf215546Sopenharmony_ci } 5485bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_UBO) { 5486bf215546Sopenharmony_ci if (shs->constbufs[i].buffer) 5487bf215546Sopenharmony_ci surf_offsets[s] = emit_ubo_buffer(ice, batch, &shs->constbufs[i]); 5488bf215546Sopenharmony_ci else 5489bf215546Sopenharmony_ci emit_null_surface(batch, &surf_offsets[s]); 5490bf215546Sopenharmony_ci s++; 5491bf215546Sopenharmony_ci } 5492bf215546Sopenharmony_ci foreach_surface_used(i, CROCUS_SURFACE_GROUP_SSBO) { 5493bf215546Sopenharmony_ci if (shs->ssbo[i].buffer) 5494bf215546Sopenharmony_ci surf_offsets[s] = emit_ssbo_buffer(ice, batch, &shs->ssbo[i], 5495bf215546Sopenharmony_ci !!(shs->writable_ssbos & (1 << i))); 5496bf215546Sopenharmony_ci else 5497bf215546Sopenharmony_ci emit_null_surface(batch, &surf_offsets[s]); 5498bf215546Sopenharmony_ci s++; 5499bf215546Sopenharmony_ci } 5500bf215546Sopenharmony_ci 5501bf215546Sopenharmony_ci} 5502bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */ 5503bf215546Sopenharmony_cistatic uint32_t 5504bf215546Sopenharmony_cicrocus_upload_binding_table(struct crocus_context *ice, 5505bf215546Sopenharmony_ci struct crocus_batch *batch, 5506bf215546Sopenharmony_ci uint32_t *table, 5507bf215546Sopenharmony_ci uint32_t size) 5508bf215546Sopenharmony_ci 5509bf215546Sopenharmony_ci{ 5510bf215546Sopenharmony_ci if (size == 0) 5511bf215546Sopenharmony_ci return 0; 5512bf215546Sopenharmony_ci return emit_state(batch, table, size, 32); 5513bf215546Sopenharmony_ci} 5514bf215546Sopenharmony_ci 5515bf215546Sopenharmony_ci/** 5516bf215546Sopenharmony_ci * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address. 5517bf215546Sopenharmony_ci */ 5518bf215546Sopenharmony_ci 5519bf215546Sopenharmony_cistatic void 5520bf215546Sopenharmony_cicrocus_update_surface_base_address(struct crocus_batch *batch) 5521bf215546Sopenharmony_ci{ 5522bf215546Sopenharmony_ci if (batch->state_base_address_emitted) 5523bf215546Sopenharmony_ci return; 5524bf215546Sopenharmony_ci 5525bf215546Sopenharmony_ci UNUSED uint32_t mocs = batch->screen->isl_dev.mocs.internal; 5526bf215546Sopenharmony_ci 5527bf215546Sopenharmony_ci flush_before_state_base_change(batch); 5528bf215546Sopenharmony_ci 5529bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { 5530bf215546Sopenharmony_ci /* Set base addresses */ 5531bf215546Sopenharmony_ci sba.GeneralStateBaseAddressModifyEnable = true; 5532bf215546Sopenharmony_ci 5533bf215546Sopenharmony_ci#if GFX_VER >= 6 5534bf215546Sopenharmony_ci sba.DynamicStateBaseAddressModifyEnable = true; 5535bf215546Sopenharmony_ci sba.DynamicStateBaseAddress = ro_bo(batch->state.bo, 0); 5536bf215546Sopenharmony_ci#endif 5537bf215546Sopenharmony_ci 5538bf215546Sopenharmony_ci sba.SurfaceStateBaseAddressModifyEnable = true; 5539bf215546Sopenharmony_ci sba.SurfaceStateBaseAddress = ro_bo(batch->state.bo, 0); 5540bf215546Sopenharmony_ci 5541bf215546Sopenharmony_ci sba.IndirectObjectBaseAddressModifyEnable = true; 5542bf215546Sopenharmony_ci 5543bf215546Sopenharmony_ci#if GFX_VER >= 5 5544bf215546Sopenharmony_ci sba.InstructionBaseAddressModifyEnable = true; 5545bf215546Sopenharmony_ci sba.InstructionBaseAddress = ro_bo(batch->ice->shaders.cache_bo, 0); // TODO! 5546bf215546Sopenharmony_ci#endif 5547bf215546Sopenharmony_ci 5548bf215546Sopenharmony_ci /* Set buffer sizes on Gen8+ or upper bounds on Gen4-7 */ 5549bf215546Sopenharmony_ci#if GFX_VER == 8 5550bf215546Sopenharmony_ci sba.GeneralStateBufferSize = 0xfffff; 5551bf215546Sopenharmony_ci sba.IndirectObjectBufferSize = 0xfffff; 5552bf215546Sopenharmony_ci sba.InstructionBufferSize = 0xfffff; 5553bf215546Sopenharmony_ci sba.DynamicStateBufferSize = MAX_STATE_SIZE; 5554bf215546Sopenharmony_ci 5555bf215546Sopenharmony_ci sba.GeneralStateBufferSizeModifyEnable = true; 5556bf215546Sopenharmony_ci sba.DynamicStateBufferSizeModifyEnable = true; 5557bf215546Sopenharmony_ci sba.IndirectObjectBufferSizeModifyEnable = true; 5558bf215546Sopenharmony_ci sba.InstructionBuffersizeModifyEnable = true; 5559bf215546Sopenharmony_ci#else 5560bf215546Sopenharmony_ci sba.GeneralStateAccessUpperBoundModifyEnable = true; 5561bf215546Sopenharmony_ci sba.IndirectObjectAccessUpperBoundModifyEnable = true; 5562bf215546Sopenharmony_ci 5563bf215546Sopenharmony_ci#if GFX_VER >= 5 5564bf215546Sopenharmony_ci sba.InstructionAccessUpperBoundModifyEnable = true; 5565bf215546Sopenharmony_ci#endif 5566bf215546Sopenharmony_ci 5567bf215546Sopenharmony_ci#if GFX_VER >= 6 5568bf215546Sopenharmony_ci /* Dynamic state upper bound. Although the documentation says that 5569bf215546Sopenharmony_ci * programming it to zero will cause it to be ignored, that is a lie. 5570bf215546Sopenharmony_ci * If this isn't programmed to a real bound, the sampler border color 5571bf215546Sopenharmony_ci * pointer is rejected, causing border color to mysteriously fail. 5572bf215546Sopenharmony_ci */ 5573bf215546Sopenharmony_ci sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000); 5574bf215546Sopenharmony_ci sba.DynamicStateAccessUpperBoundModifyEnable = true; 5575bf215546Sopenharmony_ci#else 5576bf215546Sopenharmony_ci /* Same idea but using General State Base Address on Gen4-5 */ 5577bf215546Sopenharmony_ci sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000); 5578bf215546Sopenharmony_ci#endif 5579bf215546Sopenharmony_ci#endif 5580bf215546Sopenharmony_ci 5581bf215546Sopenharmony_ci#if GFX_VER >= 6 5582bf215546Sopenharmony_ci /* The hardware appears to pay attention to the MOCS fields even 5583bf215546Sopenharmony_ci * if you don't set the "Address Modify Enable" bit for the base. 5584bf215546Sopenharmony_ci */ 5585bf215546Sopenharmony_ci sba.GeneralStateMOCS = mocs; 5586bf215546Sopenharmony_ci sba.StatelessDataPortAccessMOCS = mocs; 5587bf215546Sopenharmony_ci sba.DynamicStateMOCS = mocs; 5588bf215546Sopenharmony_ci sba.IndirectObjectMOCS = mocs; 5589bf215546Sopenharmony_ci sba.InstructionMOCS = mocs; 5590bf215546Sopenharmony_ci sba.SurfaceStateMOCS = mocs; 5591bf215546Sopenharmony_ci#endif 5592bf215546Sopenharmony_ci } 5593bf215546Sopenharmony_ci 5594bf215546Sopenharmony_ci flush_after_state_base_change(batch); 5595bf215546Sopenharmony_ci 5596bf215546Sopenharmony_ci /* According to section 3.6.1 of VOL1 of the 965 PRM, 5597bf215546Sopenharmony_ci * STATE_BASE_ADDRESS updates require a reissue of: 5598bf215546Sopenharmony_ci * 5599bf215546Sopenharmony_ci * 3DSTATE_PIPELINE_POINTERS 5600bf215546Sopenharmony_ci * 3DSTATE_BINDING_TABLE_POINTERS 5601bf215546Sopenharmony_ci * MEDIA_STATE_POINTERS 5602bf215546Sopenharmony_ci * 5603bf215546Sopenharmony_ci * and this continues through Ironlake. The Sandy Bridge PRM, vol 5604bf215546Sopenharmony_ci * 1 part 1 says that the folowing packets must be reissued: 5605bf215546Sopenharmony_ci * 5606bf215546Sopenharmony_ci * 3DSTATE_CC_POINTERS 5607bf215546Sopenharmony_ci * 3DSTATE_BINDING_TABLE_POINTERS 5608bf215546Sopenharmony_ci * 3DSTATE_SAMPLER_STATE_POINTERS 5609bf215546Sopenharmony_ci * 3DSTATE_VIEWPORT_STATE_POINTERS 5610bf215546Sopenharmony_ci * MEDIA_STATE_POINTERS 5611bf215546Sopenharmony_ci * 5612bf215546Sopenharmony_ci * Those are always reissued following SBA updates anyway (new 5613bf215546Sopenharmony_ci * batch time), except in the case of the program cache BO 5614bf215546Sopenharmony_ci * changing. Having a separate state flag makes the sequence more 5615bf215546Sopenharmony_ci * obvious. 5616bf215546Sopenharmony_ci */ 5617bf215546Sopenharmony_ci#if GFX_VER <= 5 5618bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS | CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS; 5619bf215546Sopenharmony_ci#elif GFX_VER == 6 5620bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS | CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS; 5621bf215546Sopenharmony_ci#endif 5622bf215546Sopenharmony_ci batch->state_base_address_emitted = true; 5623bf215546Sopenharmony_ci} 5624bf215546Sopenharmony_ci 5625bf215546Sopenharmony_cistatic inline void 5626bf215546Sopenharmony_cicrocus_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz, 5627bf215546Sopenharmony_ci bool window_space_position, float *zmin, float *zmax) 5628bf215546Sopenharmony_ci{ 5629bf215546Sopenharmony_ci if (window_space_position) { 5630bf215546Sopenharmony_ci *zmin = 0.f; 5631bf215546Sopenharmony_ci *zmax = 1.f; 5632bf215546Sopenharmony_ci return; 5633bf215546Sopenharmony_ci } 5634bf215546Sopenharmony_ci util_viewport_zmin_zmax(vp, halfz, zmin, zmax); 5635bf215546Sopenharmony_ci} 5636bf215546Sopenharmony_ci 5637bf215546Sopenharmony_cistruct push_bos { 5638bf215546Sopenharmony_ci struct { 5639bf215546Sopenharmony_ci struct crocus_address addr; 5640bf215546Sopenharmony_ci uint32_t length; 5641bf215546Sopenharmony_ci } buffers[4]; 5642bf215546Sopenharmony_ci int buffer_count; 5643bf215546Sopenharmony_ci uint32_t max_length; 5644bf215546Sopenharmony_ci}; 5645bf215546Sopenharmony_ci 5646bf215546Sopenharmony_ci#if GFX_VER >= 6 5647bf215546Sopenharmony_cistatic void 5648bf215546Sopenharmony_cisetup_constant_buffers(struct crocus_context *ice, 5649bf215546Sopenharmony_ci struct crocus_batch *batch, 5650bf215546Sopenharmony_ci int stage, 5651bf215546Sopenharmony_ci struct push_bos *push_bos) 5652bf215546Sopenharmony_ci{ 5653bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 5654bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[stage]; 5655bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; 5656bf215546Sopenharmony_ci 5657bf215546Sopenharmony_ci uint32_t push_range_sum = 0; 5658bf215546Sopenharmony_ci 5659bf215546Sopenharmony_ci int n = 0; 5660bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 5661bf215546Sopenharmony_ci const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; 5662bf215546Sopenharmony_ci 5663bf215546Sopenharmony_ci if (range->length == 0) 5664bf215546Sopenharmony_ci continue; 5665bf215546Sopenharmony_ci 5666bf215546Sopenharmony_ci push_range_sum += range->length; 5667bf215546Sopenharmony_ci 5668bf215546Sopenharmony_ci if (range->length > push_bos->max_length) 5669bf215546Sopenharmony_ci push_bos->max_length = range->length; 5670bf215546Sopenharmony_ci 5671bf215546Sopenharmony_ci /* Range block is a binding table index, map back to UBO index. */ 5672bf215546Sopenharmony_ci unsigned block_index = crocus_bti_to_group_index( 5673bf215546Sopenharmony_ci &shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block); 5674bf215546Sopenharmony_ci assert(block_index != CROCUS_SURFACE_NOT_USED); 5675bf215546Sopenharmony_ci 5676bf215546Sopenharmony_ci struct pipe_constant_buffer *cbuf = &shs->constbufs[block_index]; 5677bf215546Sopenharmony_ci struct crocus_resource *res = (void *) cbuf->buffer; 5678bf215546Sopenharmony_ci 5679bf215546Sopenharmony_ci assert(cbuf->buffer_offset % 32 == 0); 5680bf215546Sopenharmony_ci 5681bf215546Sopenharmony_ci push_bos->buffers[n].length = range->length; 5682bf215546Sopenharmony_ci push_bos->buffers[n].addr = 5683bf215546Sopenharmony_ci res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset) 5684bf215546Sopenharmony_ci : ro_bo(batch->ice->workaround_bo, 5685bf215546Sopenharmony_ci batch->ice->workaround_offset); 5686bf215546Sopenharmony_ci n++; 5687bf215546Sopenharmony_ci } 5688bf215546Sopenharmony_ci 5689bf215546Sopenharmony_ci /* From the 3DSTATE_CONSTANT_XS and 3DSTATE_CONSTANT_ALL programming notes: 5690bf215546Sopenharmony_ci * 5691bf215546Sopenharmony_ci * "The sum of all four read length fields must be less than or 5692bf215546Sopenharmony_ci * equal to the size of 64." 5693bf215546Sopenharmony_ci */ 5694bf215546Sopenharmony_ci assert(push_range_sum <= 64); 5695bf215546Sopenharmony_ci 5696bf215546Sopenharmony_ci push_bos->buffer_count = n; 5697bf215546Sopenharmony_ci} 5698bf215546Sopenharmony_ci 5699bf215546Sopenharmony_ci#if GFX_VER == 7 5700bf215546Sopenharmony_cistatic void 5701bf215546Sopenharmony_cigen7_emit_vs_workaround_flush(struct crocus_batch *batch) 5702bf215546Sopenharmony_ci{ 5703bf215546Sopenharmony_ci crocus_emit_pipe_control_write(batch, 5704bf215546Sopenharmony_ci "vs workaround", 5705bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_IMMEDIATE 5706bf215546Sopenharmony_ci | PIPE_CONTROL_DEPTH_STALL, 5707bf215546Sopenharmony_ci batch->ice->workaround_bo, 5708bf215546Sopenharmony_ci batch->ice->workaround_offset, 0); 5709bf215546Sopenharmony_ci} 5710bf215546Sopenharmony_ci#endif 5711bf215546Sopenharmony_ci 5712bf215546Sopenharmony_cistatic void 5713bf215546Sopenharmony_ciemit_push_constant_packets(struct crocus_context *ice, 5714bf215546Sopenharmony_ci struct crocus_batch *batch, 5715bf215546Sopenharmony_ci int stage, 5716bf215546Sopenharmony_ci const struct push_bos *push_bos) 5717bf215546Sopenharmony_ci{ 5718bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[stage]; 5719bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = shader ? (void *) shader->prog_data : NULL; 5720bf215546Sopenharmony_ci UNUSED uint32_t mocs = crocus_mocs(NULL, &batch->screen->isl_dev); 5721bf215546Sopenharmony_ci 5722bf215546Sopenharmony_ci#if GFX_VER == 7 5723bf215546Sopenharmony_ci if (stage == MESA_SHADER_VERTEX) { 5724bf215546Sopenharmony_ci if (batch->screen->devinfo.platform == INTEL_PLATFORM_IVB) 5725bf215546Sopenharmony_ci gen7_emit_vs_workaround_flush(batch); 5726bf215546Sopenharmony_ci } 5727bf215546Sopenharmony_ci#endif 5728bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { 5729bf215546Sopenharmony_ci pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; 5730bf215546Sopenharmony_ci#if GFX_VER >= 7 5731bf215546Sopenharmony_ci#if GFX_VER != 8 5732bf215546Sopenharmony_ci /* MOCS is MBZ on Gen8 so we skip it there */ 5733bf215546Sopenharmony_ci pkt.ConstantBody.MOCS = mocs; 5734bf215546Sopenharmony_ci#endif 5735bf215546Sopenharmony_ci 5736bf215546Sopenharmony_ci if (prog_data) { 5737bf215546Sopenharmony_ci /* The Skylake PRM contains the following restriction: 5738bf215546Sopenharmony_ci * 5739bf215546Sopenharmony_ci * "The driver must ensure The following case does not occur 5740bf215546Sopenharmony_ci * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with 5741bf215546Sopenharmony_ci * buffer 3 read length equal to zero committed followed by a 5742bf215546Sopenharmony_ci * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to 5743bf215546Sopenharmony_ci * zero committed." 5744bf215546Sopenharmony_ci * 5745bf215546Sopenharmony_ci * To avoid this, we program the buffers in the highest slots. 5746bf215546Sopenharmony_ci * This way, slot 0 is only used if slot 3 is also used. 5747bf215546Sopenharmony_ci */ 5748bf215546Sopenharmony_ci int n = push_bos->buffer_count; 5749bf215546Sopenharmony_ci assert(n <= 4); 5750bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 5751bf215546Sopenharmony_ci const unsigned shift = 4 - n; 5752bf215546Sopenharmony_ci#else 5753bf215546Sopenharmony_ci const unsigned shift = 0; 5754bf215546Sopenharmony_ci#endif 5755bf215546Sopenharmony_ci for (int i = 0; i < n; i++) { 5756bf215546Sopenharmony_ci pkt.ConstantBody.ReadLength[i + shift] = 5757bf215546Sopenharmony_ci push_bos->buffers[i].length; 5758bf215546Sopenharmony_ci pkt.ConstantBody.Buffer[i + shift] = push_bos->buffers[i].addr; 5759bf215546Sopenharmony_ci } 5760bf215546Sopenharmony_ci } 5761bf215546Sopenharmony_ci#else 5762bf215546Sopenharmony_ci if (prog_data) { 5763bf215546Sopenharmony_ci int n = push_bos->buffer_count; 5764bf215546Sopenharmony_ci assert (n <= 1); 5765bf215546Sopenharmony_ci if (n == 1) { 5766bf215546Sopenharmony_ci pkt.Buffer0Valid = true; 5767bf215546Sopenharmony_ci pkt.ConstantBody.PointertoConstantBuffer0 = push_bos->buffers[0].addr.offset; 5768bf215546Sopenharmony_ci pkt.ConstantBody.ConstantBuffer0ReadLength = push_bos->buffers[0].length - 1; 5769bf215546Sopenharmony_ci } 5770bf215546Sopenharmony_ci } 5771bf215546Sopenharmony_ci#endif 5772bf215546Sopenharmony_ci } 5773bf215546Sopenharmony_ci} 5774bf215546Sopenharmony_ci 5775bf215546Sopenharmony_ci#endif 5776bf215546Sopenharmony_ci 5777bf215546Sopenharmony_ci#if GFX_VER == 8 5778bf215546Sopenharmony_citypedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML; 5779bf215546Sopenharmony_ci#elif GFX_VER >= 6 5780bf215546Sopenharmony_citypedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML; 5781bf215546Sopenharmony_ci#else 5782bf215546Sopenharmony_citypedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML; 5783bf215546Sopenharmony_ci#endif 5784bf215546Sopenharmony_ci 5785bf215546Sopenharmony_cistatic inline void 5786bf215546Sopenharmony_ciset_depth_stencil_bits(struct crocus_context *ice, DEPTH_STENCIL_GENXML *ds) 5787bf215546Sopenharmony_ci{ 5788bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa; 5789bf215546Sopenharmony_ci ds->DepthTestEnable = cso->cso.depth_enabled; 5790bf215546Sopenharmony_ci ds->DepthBufferWriteEnable = cso->cso.depth_writemask; 5791bf215546Sopenharmony_ci ds->DepthTestFunction = translate_compare_func(cso->cso.depth_func); 5792bf215546Sopenharmony_ci 5793bf215546Sopenharmony_ci ds->StencilFailOp = cso->cso.stencil[0].fail_op; 5794bf215546Sopenharmony_ci ds->StencilPassDepthFailOp = cso->cso.stencil[0].zfail_op; 5795bf215546Sopenharmony_ci ds->StencilPassDepthPassOp = cso->cso.stencil[0].zpass_op; 5796bf215546Sopenharmony_ci ds->StencilTestFunction = translate_compare_func(cso->cso.stencil[0].func); 5797bf215546Sopenharmony_ci 5798bf215546Sopenharmony_ci ds->StencilTestMask = cso->cso.stencil[0].valuemask; 5799bf215546Sopenharmony_ci ds->StencilWriteMask = cso->cso.stencil[0].writemask; 5800bf215546Sopenharmony_ci 5801bf215546Sopenharmony_ci ds->BackfaceStencilFailOp = cso->cso.stencil[1].fail_op; 5802bf215546Sopenharmony_ci ds->BackfaceStencilPassDepthFailOp = cso->cso.stencil[1].zfail_op; 5803bf215546Sopenharmony_ci ds->BackfaceStencilPassDepthPassOp = cso->cso.stencil[1].zpass_op; 5804bf215546Sopenharmony_ci ds->BackfaceStencilTestFunction = translate_compare_func(cso->cso.stencil[1].func); 5805bf215546Sopenharmony_ci 5806bf215546Sopenharmony_ci ds->BackfaceStencilTestMask = cso->cso.stencil[1].valuemask; 5807bf215546Sopenharmony_ci ds->BackfaceStencilWriteMask = cso->cso.stencil[1].writemask; 5808bf215546Sopenharmony_ci ds->DoubleSidedStencilEnable = cso->cso.stencil[1].enabled; 5809bf215546Sopenharmony_ci ds->StencilTestEnable = cso->cso.stencil[0].enabled; 5810bf215546Sopenharmony_ci ds->StencilBufferWriteEnable = 5811bf215546Sopenharmony_ci cso->cso.stencil[0].writemask != 0 || 5812bf215546Sopenharmony_ci (cso->cso.stencil[1].enabled && cso->cso.stencil[1].writemask != 0); 5813bf215546Sopenharmony_ci} 5814bf215546Sopenharmony_ci 5815bf215546Sopenharmony_cistatic void 5816bf215546Sopenharmony_ciemit_vertex_buffer_state(struct crocus_batch *batch, 5817bf215546Sopenharmony_ci unsigned buffer_id, 5818bf215546Sopenharmony_ci struct crocus_bo *bo, 5819bf215546Sopenharmony_ci unsigned start_offset, 5820bf215546Sopenharmony_ci unsigned end_offset, 5821bf215546Sopenharmony_ci unsigned stride, 5822bf215546Sopenharmony_ci unsigned step_rate, 5823bf215546Sopenharmony_ci uint32_t **map) 5824bf215546Sopenharmony_ci{ 5825bf215546Sopenharmony_ci const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); 5826bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(VERTEX_BUFFER_STATE), *map, vb) { 5827bf215546Sopenharmony_ci vb.BufferStartingAddress = ro_bo(bo, start_offset); 5828bf215546Sopenharmony_ci#if GFX_VER >= 8 5829bf215546Sopenharmony_ci vb.BufferSize = end_offset - start_offset; 5830bf215546Sopenharmony_ci#endif 5831bf215546Sopenharmony_ci vb.VertexBufferIndex = buffer_id; 5832bf215546Sopenharmony_ci vb.BufferPitch = stride; 5833bf215546Sopenharmony_ci#if GFX_VER >= 7 5834bf215546Sopenharmony_ci vb.AddressModifyEnable = true; 5835bf215546Sopenharmony_ci#endif 5836bf215546Sopenharmony_ci#if GFX_VER >= 6 5837bf215546Sopenharmony_ci vb.MOCS = crocus_mocs(bo, &batch->screen->isl_dev); 5838bf215546Sopenharmony_ci#endif 5839bf215546Sopenharmony_ci#if GFX_VER < 8 5840bf215546Sopenharmony_ci vb.BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA; 5841bf215546Sopenharmony_ci vb.InstanceDataStepRate = step_rate; 5842bf215546Sopenharmony_ci#if GFX_VER >= 5 5843bf215546Sopenharmony_ci vb.EndAddress = ro_bo(bo, end_offset - 1); 5844bf215546Sopenharmony_ci#endif 5845bf215546Sopenharmony_ci#endif 5846bf215546Sopenharmony_ci } 5847bf215546Sopenharmony_ci *map += vb_dwords; 5848bf215546Sopenharmony_ci} 5849bf215546Sopenharmony_ci 5850bf215546Sopenharmony_ci#if GFX_VER >= 6 5851bf215546Sopenharmony_cistatic uint32_t 5852bf215546Sopenharmony_cidetermine_sample_mask(struct crocus_context *ice) 5853bf215546Sopenharmony_ci{ 5854bf215546Sopenharmony_ci uint32_t num_samples = ice->state.framebuffer.samples; 5855bf215546Sopenharmony_ci 5856bf215546Sopenharmony_ci if (num_samples <= 1) 5857bf215546Sopenharmony_ci return 1; 5858bf215546Sopenharmony_ci 5859bf215546Sopenharmony_ci uint32_t fb_mask = (1 << num_samples) - 1; 5860bf215546Sopenharmony_ci return ice->state.sample_mask & fb_mask; 5861bf215546Sopenharmony_ci} 5862bf215546Sopenharmony_ci#endif 5863bf215546Sopenharmony_ci 5864bf215546Sopenharmony_cistatic void 5865bf215546Sopenharmony_cicrocus_upload_dirty_render_state(struct crocus_context *ice, 5866bf215546Sopenharmony_ci struct crocus_batch *batch, 5867bf215546Sopenharmony_ci const struct pipe_draw_info *draw) 5868bf215546Sopenharmony_ci{ 5869bf215546Sopenharmony_ci uint64_t dirty = ice->state.dirty; 5870bf215546Sopenharmony_ci uint64_t stage_dirty = ice->state.stage_dirty; 5871bf215546Sopenharmony_ci 5872bf215546Sopenharmony_ci if (!(dirty & CROCUS_ALL_DIRTY_FOR_RENDER) && 5873bf215546Sopenharmony_ci !(stage_dirty & CROCUS_ALL_STAGE_DIRTY_FOR_RENDER)) 5874bf215546Sopenharmony_ci return; 5875bf215546Sopenharmony_ci 5876bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_VF_STATISTICS) { 5877bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) { 5878bf215546Sopenharmony_ci vf.StatisticsEnable = true; 5879bf215546Sopenharmony_ci } 5880bf215546Sopenharmony_ci } 5881bf215546Sopenharmony_ci 5882bf215546Sopenharmony_ci#if GFX_VER <= 5 5883bf215546Sopenharmony_ci if (stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS | 5884bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_FS)) { 5885bf215546Sopenharmony_ci bool ret = calculate_curbe_offsets(batch); 5886bf215546Sopenharmony_ci if (ret) { 5887bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_WM | CROCUS_DIRTY_CLIP; 5888bf215546Sopenharmony_ci stage_dirty |= CROCUS_STAGE_DIRTY_VS; 5889bf215546Sopenharmony_ci } 5890bf215546Sopenharmony_ci } 5891bf215546Sopenharmony_ci 5892bf215546Sopenharmony_ci if (dirty & (CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_RASTER) || 5893bf215546Sopenharmony_ci stage_dirty & CROCUS_STAGE_DIRTY_VS) { 5894bf215546Sopenharmony_ci bool ret = crocus_calculate_urb_fence(batch, ice->curbe.total_size, 5895bf215546Sopenharmony_ci brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->urb_entry_size, 5896bf215546Sopenharmony_ci ((struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data)->urb_entry_size); 5897bf215546Sopenharmony_ci if (ret) { 5898bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS | CROCUS_DIRTY_RASTER | CROCUS_DIRTY_CLIP; 5899bf215546Sopenharmony_ci stage_dirty |= CROCUS_STAGE_DIRTY_GS | CROCUS_STAGE_DIRTY_VS; 5900bf215546Sopenharmony_ci } 5901bf215546Sopenharmony_ci } 5902bf215546Sopenharmony_ci#endif 5903bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_CC_VIEWPORT) { 5904bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 5905bf215546Sopenharmony_ci uint32_t cc_vp_address; 5906bf215546Sopenharmony_ci 5907bf215546Sopenharmony_ci /* XXX: could avoid streaming for depth_clip [0,1] case. */ 5908bf215546Sopenharmony_ci uint32_t *cc_vp_map = 5909bf215546Sopenharmony_ci stream_state(batch, 5910bf215546Sopenharmony_ci 4 * ice->state.num_viewports * 5911bf215546Sopenharmony_ci GENX(CC_VIEWPORT_length), 32, &cc_vp_address); 5912bf215546Sopenharmony_ci for (int i = 0; i < ice->state.num_viewports; i++) { 5913bf215546Sopenharmony_ci float zmin, zmax; 5914bf215546Sopenharmony_ci crocus_viewport_zmin_zmax(&ice->state.viewports[i], cso_rast->cso.clip_halfz, 5915bf215546Sopenharmony_ci ice->state.window_space_position, 5916bf215546Sopenharmony_ci &zmin, &zmax); 5917bf215546Sopenharmony_ci if (cso_rast->cso.depth_clip_near) 5918bf215546Sopenharmony_ci zmin = 0.0; 5919bf215546Sopenharmony_ci if (cso_rast->cso.depth_clip_far) 5920bf215546Sopenharmony_ci zmax = 1.0; 5921bf215546Sopenharmony_ci 5922bf215546Sopenharmony_ci crocus_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) { 5923bf215546Sopenharmony_ci ccv.MinimumDepth = zmin; 5924bf215546Sopenharmony_ci ccv.MaximumDepth = zmax; 5925bf215546Sopenharmony_ci } 5926bf215546Sopenharmony_ci 5927bf215546Sopenharmony_ci cc_vp_map += GENX(CC_VIEWPORT_length); 5928bf215546Sopenharmony_ci } 5929bf215546Sopenharmony_ci 5930bf215546Sopenharmony_ci#if GFX_VER >= 7 5931bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { 5932bf215546Sopenharmony_ci ptr.CCViewportPointer = cc_vp_address; 5933bf215546Sopenharmony_ci } 5934bf215546Sopenharmony_ci#elif GFX_VER == 6 5935bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) { 5936bf215546Sopenharmony_ci vp.CCViewportStateChange = 1; 5937bf215546Sopenharmony_ci vp.PointertoCC_VIEWPORT = cc_vp_address; 5938bf215546Sopenharmony_ci } 5939bf215546Sopenharmony_ci#else 5940bf215546Sopenharmony_ci ice->state.cc_vp_address = cc_vp_address; 5941bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_COLOR_CALC_STATE; 5942bf215546Sopenharmony_ci#endif 5943bf215546Sopenharmony_ci } 5944bf215546Sopenharmony_ci 5945bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_SF_CL_VIEWPORT) { 5946bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 5947bf215546Sopenharmony_ci#if GFX_VER >= 7 5948bf215546Sopenharmony_ci uint32_t sf_cl_vp_address; 5949bf215546Sopenharmony_ci uint32_t *vp_map = 5950bf215546Sopenharmony_ci stream_state(batch, 5951bf215546Sopenharmony_ci 4 * ice->state.num_viewports * 5952bf215546Sopenharmony_ci GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address); 5953bf215546Sopenharmony_ci#else 5954bf215546Sopenharmony_ci uint32_t *vp_map = 5955bf215546Sopenharmony_ci stream_state(batch, 5956bf215546Sopenharmony_ci 4 * ice->state.num_viewports * GENX(SF_VIEWPORT_length), 5957bf215546Sopenharmony_ci 32, &ice->state.sf_vp_address); 5958bf215546Sopenharmony_ci uint32_t *clip_map = 5959bf215546Sopenharmony_ci stream_state(batch, 5960bf215546Sopenharmony_ci 4 * ice->state.num_viewports * GENX(CLIP_VIEWPORT_length), 5961bf215546Sopenharmony_ci 32, &ice->state.clip_vp_address); 5962bf215546Sopenharmony_ci#endif 5963bf215546Sopenharmony_ci 5964bf215546Sopenharmony_ci for (unsigned i = 0; i < ice->state.num_viewports; i++) { 5965bf215546Sopenharmony_ci const struct pipe_viewport_state *state = &ice->state.viewports[i]; 5966bf215546Sopenharmony_ci float gb_xmin, gb_xmax, gb_ymin, gb_ymax; 5967bf215546Sopenharmony_ci 5968bf215546Sopenharmony_ci#if GFX_VER == 8 5969bf215546Sopenharmony_ci float vp_xmin = viewport_extent(state, 0, -1.0f); 5970bf215546Sopenharmony_ci float vp_xmax = viewport_extent(state, 0, 1.0f); 5971bf215546Sopenharmony_ci float vp_ymin = viewport_extent(state, 1, -1.0f); 5972bf215546Sopenharmony_ci float vp_ymax = viewport_extent(state, 1, 1.0f); 5973bf215546Sopenharmony_ci#endif 5974bf215546Sopenharmony_ci intel_calculate_guardband_size(0, cso_fb->width, 0, cso_fb->height, 5975bf215546Sopenharmony_ci state->scale[0], state->scale[1], 5976bf215546Sopenharmony_ci state->translate[0], state->translate[1], 5977bf215546Sopenharmony_ci &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); 5978bf215546Sopenharmony_ci#if GFX_VER >= 7 5979bf215546Sopenharmony_ci crocus_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) 5980bf215546Sopenharmony_ci#else 5981bf215546Sopenharmony_ci crocus_pack_state(GENX(SF_VIEWPORT), vp_map, vp) 5982bf215546Sopenharmony_ci#endif 5983bf215546Sopenharmony_ci { 5984bf215546Sopenharmony_ci vp.ViewportMatrixElementm00 = state->scale[0]; 5985bf215546Sopenharmony_ci vp.ViewportMatrixElementm11 = state->scale[1]; 5986bf215546Sopenharmony_ci vp.ViewportMatrixElementm22 = state->scale[2]; 5987bf215546Sopenharmony_ci vp.ViewportMatrixElementm30 = state->translate[0]; 5988bf215546Sopenharmony_ci vp.ViewportMatrixElementm31 = state->translate[1]; 5989bf215546Sopenharmony_ci vp.ViewportMatrixElementm32 = state->translate[2]; 5990bf215546Sopenharmony_ci#if GFX_VER < 6 5991bf215546Sopenharmony_ci struct pipe_scissor_state scissor; 5992bf215546Sopenharmony_ci crocus_fill_scissor_rect(ice, 0, &scissor); 5993bf215546Sopenharmony_ci vp.ScissorRectangle.ScissorRectangleXMin = scissor.minx; 5994bf215546Sopenharmony_ci vp.ScissorRectangle.ScissorRectangleXMax = scissor.maxx; 5995bf215546Sopenharmony_ci vp.ScissorRectangle.ScissorRectangleYMin = scissor.miny; 5996bf215546Sopenharmony_ci vp.ScissorRectangle.ScissorRectangleYMax = scissor.maxy; 5997bf215546Sopenharmony_ci#endif 5998bf215546Sopenharmony_ci 5999bf215546Sopenharmony_ci#if GFX_VER >= 7 6000bf215546Sopenharmony_ci vp.XMinClipGuardband = gb_xmin; 6001bf215546Sopenharmony_ci vp.XMaxClipGuardband = gb_xmax; 6002bf215546Sopenharmony_ci vp.YMinClipGuardband = gb_ymin; 6003bf215546Sopenharmony_ci vp.YMaxClipGuardband = gb_ymax; 6004bf215546Sopenharmony_ci#endif 6005bf215546Sopenharmony_ci#if GFX_VER == 8 6006bf215546Sopenharmony_ci vp.XMinViewPort = MAX2(vp_xmin, 0); 6007bf215546Sopenharmony_ci vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1; 6008bf215546Sopenharmony_ci vp.YMinViewPort = MAX2(vp_ymin, 0); 6009bf215546Sopenharmony_ci vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1; 6010bf215546Sopenharmony_ci#endif 6011bf215546Sopenharmony_ci } 6012bf215546Sopenharmony_ci#if GFX_VER < 7 6013bf215546Sopenharmony_ci crocus_pack_state(GENX(CLIP_VIEWPORT), clip_map, clip) { 6014bf215546Sopenharmony_ci clip.XMinClipGuardband = gb_xmin; 6015bf215546Sopenharmony_ci clip.XMaxClipGuardband = gb_xmax; 6016bf215546Sopenharmony_ci clip.YMinClipGuardband = gb_ymin; 6017bf215546Sopenharmony_ci clip.YMaxClipGuardband = gb_ymax; 6018bf215546Sopenharmony_ci } 6019bf215546Sopenharmony_ci#endif 6020bf215546Sopenharmony_ci#if GFX_VER >= 7 6021bf215546Sopenharmony_ci vp_map += GENX(SF_CLIP_VIEWPORT_length); 6022bf215546Sopenharmony_ci#else 6023bf215546Sopenharmony_ci vp_map += GENX(SF_VIEWPORT_length); 6024bf215546Sopenharmony_ci clip_map += GENX(CLIP_VIEWPORT_length); 6025bf215546Sopenharmony_ci#endif 6026bf215546Sopenharmony_ci } 6027bf215546Sopenharmony_ci#if GFX_VER >= 7 6028bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { 6029bf215546Sopenharmony_ci ptr.SFClipViewportPointer = sf_cl_vp_address; 6030bf215546Sopenharmony_ci } 6031bf215546Sopenharmony_ci#elif GFX_VER == 6 6032bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) { 6033bf215546Sopenharmony_ci vp.SFViewportStateChange = 1; 6034bf215546Sopenharmony_ci vp.CLIPViewportStateChange = 1; 6035bf215546Sopenharmony_ci vp.PointertoCLIP_VIEWPORT = ice->state.clip_vp_address; 6036bf215546Sopenharmony_ci vp.PointertoSF_VIEWPORT = ice->state.sf_vp_address; 6037bf215546Sopenharmony_ci } 6038bf215546Sopenharmony_ci#endif 6039bf215546Sopenharmony_ci } 6040bf215546Sopenharmony_ci 6041bf215546Sopenharmony_ci#if GFX_VER >= 6 6042bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_URB) { 6043bf215546Sopenharmony_ci#if GFX_VER == 6 6044bf215546Sopenharmony_ci bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL 6045bf215546Sopenharmony_ci || ice->shaders.ff_gs_prog; 6046bf215546Sopenharmony_ci 6047bf215546Sopenharmony_ci struct brw_vue_prog_data *vue_prog_data = 6048bf215546Sopenharmony_ci (void *) ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data; 6049bf215546Sopenharmony_ci const unsigned vs_size = vue_prog_data->urb_entry_size; 6050bf215546Sopenharmony_ci unsigned gs_size = vs_size; 6051bf215546Sopenharmony_ci if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) { 6052bf215546Sopenharmony_ci struct brw_vue_prog_data *gs_vue_prog_data = 6053bf215546Sopenharmony_ci (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; 6054bf215546Sopenharmony_ci gs_size = gs_vue_prog_data->urb_entry_size; 6055bf215546Sopenharmony_ci } 6056bf215546Sopenharmony_ci 6057bf215546Sopenharmony_ci genX(crocus_upload_urb)(batch, vs_size, gs_present, gs_size); 6058bf215546Sopenharmony_ci#endif 6059bf215546Sopenharmony_ci#if GFX_VER >= 7 6060bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &batch->screen->devinfo; 6061bf215546Sopenharmony_ci bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL; 6062bf215546Sopenharmony_ci bool tess_present = ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL; 6063bf215546Sopenharmony_ci unsigned entry_size[4]; 6064bf215546Sopenharmony_ci 6065bf215546Sopenharmony_ci for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 6066bf215546Sopenharmony_ci if (!ice->shaders.prog[i]) { 6067bf215546Sopenharmony_ci entry_size[i] = 1; 6068bf215546Sopenharmony_ci } else { 6069bf215546Sopenharmony_ci struct brw_vue_prog_data *vue_prog_data = 6070bf215546Sopenharmony_ci (void *) ice->shaders.prog[i]->prog_data; 6071bf215546Sopenharmony_ci entry_size[i] = vue_prog_data->urb_entry_size; 6072bf215546Sopenharmony_ci } 6073bf215546Sopenharmony_ci assert(entry_size[i] != 0); 6074bf215546Sopenharmony_ci } 6075bf215546Sopenharmony_ci 6076bf215546Sopenharmony_ci /* If we're just switching between programs with the same URB requirements, 6077bf215546Sopenharmony_ci * skip the rest of the logic. 6078bf215546Sopenharmony_ci */ 6079bf215546Sopenharmony_ci bool no_change = false; 6080bf215546Sopenharmony_ci if (ice->urb.vsize == entry_size[MESA_SHADER_VERTEX] && 6081bf215546Sopenharmony_ci ice->urb.gs_present == gs_present && 6082bf215546Sopenharmony_ci ice->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] && 6083bf215546Sopenharmony_ci ice->urb.tess_present == tess_present && 6084bf215546Sopenharmony_ci ice->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] && 6085bf215546Sopenharmony_ci ice->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) { 6086bf215546Sopenharmony_ci no_change = true; 6087bf215546Sopenharmony_ci } 6088bf215546Sopenharmony_ci 6089bf215546Sopenharmony_ci if (!no_change) { 6090bf215546Sopenharmony_ci ice->urb.vsize = entry_size[MESA_SHADER_VERTEX]; 6091bf215546Sopenharmony_ci ice->urb.gs_present = gs_present; 6092bf215546Sopenharmony_ci ice->urb.gsize = entry_size[MESA_SHADER_GEOMETRY]; 6093bf215546Sopenharmony_ci ice->urb.tess_present = tess_present; 6094bf215546Sopenharmony_ci ice->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL]; 6095bf215546Sopenharmony_ci ice->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL]; 6096bf215546Sopenharmony_ci 6097bf215546Sopenharmony_ci unsigned entries[4]; 6098bf215546Sopenharmony_ci unsigned start[4]; 6099bf215546Sopenharmony_ci bool constrained; 6100bf215546Sopenharmony_ci intel_get_urb_config(devinfo, 6101bf215546Sopenharmony_ci batch->screen->l3_config_3d, 6102bf215546Sopenharmony_ci tess_present, 6103bf215546Sopenharmony_ci gs_present, 6104bf215546Sopenharmony_ci entry_size, 6105bf215546Sopenharmony_ci entries, start, NULL, &constrained); 6106bf215546Sopenharmony_ci 6107bf215546Sopenharmony_ci#if GFX_VER == 7 6108bf215546Sopenharmony_ci if (devinfo->platform == INTEL_PLATFORM_IVB) 6109bf215546Sopenharmony_ci gen7_emit_vs_workaround_flush(batch); 6110bf215546Sopenharmony_ci#endif 6111bf215546Sopenharmony_ci for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 6112bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) { 6113bf215546Sopenharmony_ci urb._3DCommandSubOpcode += i; 6114bf215546Sopenharmony_ci urb.VSURBStartingAddress = start[i]; 6115bf215546Sopenharmony_ci urb.VSURBEntryAllocationSize = entry_size[i] - 1; 6116bf215546Sopenharmony_ci urb.VSNumberofURBEntries = entries[i]; 6117bf215546Sopenharmony_ci } 6118bf215546Sopenharmony_ci } 6119bf215546Sopenharmony_ci } 6120bf215546Sopenharmony_ci#endif 6121bf215546Sopenharmony_ci } 6122bf215546Sopenharmony_ci 6123bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_BLEND_STATE) { 6124bf215546Sopenharmony_ci struct crocus_blend_state *cso_blend = ice->state.cso_blend; 6125bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 6126bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 6127bf215546Sopenharmony_ci 6128bf215546Sopenharmony_ci STATIC_ASSERT(GENX(BLEND_STATE_ENTRY_length) == 2); 6129bf215546Sopenharmony_ci int rt_dwords = 6130bf215546Sopenharmony_ci MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length); 6131bf215546Sopenharmony_ci#if GFX_VER >= 8 6132bf215546Sopenharmony_ci rt_dwords += GENX(BLEND_STATE_length); 6133bf215546Sopenharmony_ci#endif 6134bf215546Sopenharmony_ci uint32_t blend_offset; 6135bf215546Sopenharmony_ci uint32_t *blend_map = 6136bf215546Sopenharmony_ci stream_state(batch, 6137bf215546Sopenharmony_ci 4 * rt_dwords, 64, &blend_offset); 6138bf215546Sopenharmony_ci 6139bf215546Sopenharmony_ci#if GFX_VER >= 8 6140bf215546Sopenharmony_ci struct GENX(BLEND_STATE) be = { 0 }; 6141bf215546Sopenharmony_ci { 6142bf215546Sopenharmony_ci#else 6143bf215546Sopenharmony_ci for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) { 6144bf215546Sopenharmony_ci struct GENX(BLEND_STATE_ENTRY) entry = { 0 }; 6145bf215546Sopenharmony_ci#define be entry 6146bf215546Sopenharmony_ci#endif 6147bf215546Sopenharmony_ci 6148bf215546Sopenharmony_ci be.AlphaTestEnable = cso_zsa->cso.alpha_enabled; 6149bf215546Sopenharmony_ci be.AlphaTestFunction = translate_compare_func(cso_zsa->cso.alpha_func); 6150bf215546Sopenharmony_ci be.AlphaToCoverageEnable = cso_blend->cso.alpha_to_coverage; 6151bf215546Sopenharmony_ci be.AlphaToOneEnable = cso_blend->cso.alpha_to_one; 6152bf215546Sopenharmony_ci be.AlphaToCoverageDitherEnable = GFX_VER >= 7 && cso_blend->cso.alpha_to_coverage; 6153bf215546Sopenharmony_ci be.ColorDitherEnable = cso_blend->cso.dither; 6154bf215546Sopenharmony_ci 6155bf215546Sopenharmony_ci#if GFX_VER >= 8 6156bf215546Sopenharmony_ci for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) { 6157bf215546Sopenharmony_ci struct GENX(BLEND_STATE_ENTRY) entry = { 0 }; 6158bf215546Sopenharmony_ci#else 6159bf215546Sopenharmony_ci { 6160bf215546Sopenharmony_ci#endif 6161bf215546Sopenharmony_ci const struct pipe_rt_blend_state *rt = 6162bf215546Sopenharmony_ci &cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? i : 0]; 6163bf215546Sopenharmony_ci 6164bf215546Sopenharmony_ci be.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &entry, cso_blend, i) || 6165bf215546Sopenharmony_ci be.IndependentAlphaBlendEnable; 6166bf215546Sopenharmony_ci 6167bf215546Sopenharmony_ci if (GFX_VER >= 8 || can_emit_logic_op(ice)) { 6168bf215546Sopenharmony_ci entry.LogicOpEnable = cso_blend->cso.logicop_enable; 6169bf215546Sopenharmony_ci entry.LogicOpFunction = cso_blend->cso.logicop_func; 6170bf215546Sopenharmony_ci } 6171bf215546Sopenharmony_ci 6172bf215546Sopenharmony_ci entry.ColorClampRange = COLORCLAMP_RTFORMAT; 6173bf215546Sopenharmony_ci entry.PreBlendColorClampEnable = true; 6174bf215546Sopenharmony_ci entry.PostBlendColorClampEnable = true; 6175bf215546Sopenharmony_ci 6176bf215546Sopenharmony_ci entry.WriteDisableRed = !(rt->colormask & PIPE_MASK_R); 6177bf215546Sopenharmony_ci entry.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G); 6178bf215546Sopenharmony_ci entry.WriteDisableBlue = !(rt->colormask & PIPE_MASK_B); 6179bf215546Sopenharmony_ci entry.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A); 6180bf215546Sopenharmony_ci 6181bf215546Sopenharmony_ci#if GFX_VER >= 8 6182bf215546Sopenharmony_ci GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry); 6183bf215546Sopenharmony_ci#else 6184bf215546Sopenharmony_ci GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry); 6185bf215546Sopenharmony_ci#endif 6186bf215546Sopenharmony_ci } 6187bf215546Sopenharmony_ci } 6188bf215546Sopenharmony_ci#if GFX_VER >= 8 6189bf215546Sopenharmony_ci GENX(BLEND_STATE_pack)(NULL, blend_map, &be); 6190bf215546Sopenharmony_ci#endif 6191bf215546Sopenharmony_ci#if GFX_VER < 7 6192bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 6193bf215546Sopenharmony_ci ptr.PointertoBLEND_STATE = blend_offset; 6194bf215546Sopenharmony_ci ptr.BLEND_STATEChange = true; 6195bf215546Sopenharmony_ci } 6196bf215546Sopenharmony_ci#else 6197bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) { 6198bf215546Sopenharmony_ci ptr.BlendStatePointer = blend_offset; 6199bf215546Sopenharmony_ci#if GFX_VER >= 8 6200bf215546Sopenharmony_ci ptr.BlendStatePointerValid = true; 6201bf215546Sopenharmony_ci#endif 6202bf215546Sopenharmony_ci } 6203bf215546Sopenharmony_ci#endif 6204bf215546Sopenharmony_ci } 6205bf215546Sopenharmony_ci#endif 6206bf215546Sopenharmony_ci 6207bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_COLOR_CALC_STATE) { 6208bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa; 6209bf215546Sopenharmony_ci UNUSED struct crocus_blend_state *cso_blend = ice->state.cso_blend; 6210bf215546Sopenharmony_ci struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; 6211bf215546Sopenharmony_ci uint32_t cc_offset; 6212bf215546Sopenharmony_ci void *cc_map = 6213bf215546Sopenharmony_ci stream_state(batch, 6214bf215546Sopenharmony_ci sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), 6215bf215546Sopenharmony_ci 64, &cc_offset); 6216bf215546Sopenharmony_ci#if GFX_VER <= 5 6217bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS; 6218bf215546Sopenharmony_ci#endif 6219bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(COLOR_CALC_STATE), cc_map, cc) { 6220bf215546Sopenharmony_ci cc.AlphaTestFormat = ALPHATEST_FLOAT32; 6221bf215546Sopenharmony_ci cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value; 6222bf215546Sopenharmony_ci 6223bf215546Sopenharmony_ci#if GFX_VER <= 5 6224bf215546Sopenharmony_ci 6225bf215546Sopenharmony_ci set_depth_stencil_bits(ice, &cc); 6226bf215546Sopenharmony_ci 6227bf215546Sopenharmony_ci if (cso_blend->cso.logicop_enable) { 6228bf215546Sopenharmony_ci if (can_emit_logic_op(ice)) { 6229bf215546Sopenharmony_ci cc.LogicOpEnable = cso_blend->cso.logicop_enable; 6230bf215546Sopenharmony_ci cc.LogicOpFunction = cso_blend->cso.logicop_func; 6231bf215546Sopenharmony_ci } 6232bf215546Sopenharmony_ci } 6233bf215546Sopenharmony_ci cc.ColorDitherEnable = cso_blend->cso.dither; 6234bf215546Sopenharmony_ci 6235bf215546Sopenharmony_ci cc.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &cc, cso_blend, 0); 6236bf215546Sopenharmony_ci 6237bf215546Sopenharmony_ci if (cso->cso.alpha_enabled && ice->state.framebuffer.nr_cbufs <= 1) { 6238bf215546Sopenharmony_ci cc.AlphaTestEnable = cso->cso.alpha_enabled; 6239bf215546Sopenharmony_ci cc.AlphaTestFunction = translate_compare_func(cso->cso.alpha_func); 6240bf215546Sopenharmony_ci } 6241bf215546Sopenharmony_ci cc.StatisticsEnable = ice->state.stats_wm ? 1 : 0; 6242bf215546Sopenharmony_ci cc.CCViewportStatePointer = ro_bo(batch->state.bo, ice->state.cc_vp_address); 6243bf215546Sopenharmony_ci#else 6244bf215546Sopenharmony_ci cc.AlphaTestFormat = ALPHATEST_FLOAT32; 6245bf215546Sopenharmony_ci cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value; 6246bf215546Sopenharmony_ci 6247bf215546Sopenharmony_ci cc.BlendConstantColorRed = ice->state.blend_color.color[0]; 6248bf215546Sopenharmony_ci cc.BlendConstantColorGreen = ice->state.blend_color.color[1]; 6249bf215546Sopenharmony_ci cc.BlendConstantColorBlue = ice->state.blend_color.color[2]; 6250bf215546Sopenharmony_ci cc.BlendConstantColorAlpha = ice->state.blend_color.color[3]; 6251bf215546Sopenharmony_ci#endif 6252bf215546Sopenharmony_ci cc.StencilReferenceValue = p_stencil_refs->ref_value[0]; 6253bf215546Sopenharmony_ci cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; 6254bf215546Sopenharmony_ci } 6255bf215546Sopenharmony_ci ice->shaders.cc_offset = cc_offset; 6256bf215546Sopenharmony_ci#if GFX_VER >= 6 6257bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 6258bf215546Sopenharmony_ci ptr.ColorCalcStatePointer = cc_offset; 6259bf215546Sopenharmony_ci#if GFX_VER != 7 6260bf215546Sopenharmony_ci ptr.ColorCalcStatePointerValid = true; 6261bf215546Sopenharmony_ci#endif 6262bf215546Sopenharmony_ci } 6263bf215546Sopenharmony_ci#endif 6264bf215546Sopenharmony_ci } 6265bf215546Sopenharmony_ci#if GFX_VER <= 5 6266bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN4_CONSTANT_COLOR) { 6267bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) { 6268bf215546Sopenharmony_ci blend_cc.BlendConstantColorRed = ice->state.blend_color.color[0]; 6269bf215546Sopenharmony_ci blend_cc.BlendConstantColorGreen = ice->state.blend_color.color[1]; 6270bf215546Sopenharmony_ci blend_cc.BlendConstantColorBlue = ice->state.blend_color.color[2]; 6271bf215546Sopenharmony_ci blend_cc.BlendConstantColorAlpha = ice->state.blend_color.color[3]; 6272bf215546Sopenharmony_ci } 6273bf215546Sopenharmony_ci } 6274bf215546Sopenharmony_ci#endif 6275bf215546Sopenharmony_ci for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 6276bf215546Sopenharmony_ci if (!(stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage))) 6277bf215546Sopenharmony_ci continue; 6278bf215546Sopenharmony_ci 6279bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 6280bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[stage]; 6281bf215546Sopenharmony_ci 6282bf215546Sopenharmony_ci if (!shader) 6283bf215546Sopenharmony_ci continue; 6284bf215546Sopenharmony_ci 6285bf215546Sopenharmony_ci if (shs->sysvals_need_upload) 6286bf215546Sopenharmony_ci upload_sysvals(ice, stage); 6287bf215546Sopenharmony_ci 6288bf215546Sopenharmony_ci#if GFX_VER <= 5 6289bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN4_CURBE; 6290bf215546Sopenharmony_ci#endif 6291bf215546Sopenharmony_ci#if GFX_VER >= 7 6292bf215546Sopenharmony_ci struct push_bos push_bos = {}; 6293bf215546Sopenharmony_ci setup_constant_buffers(ice, batch, stage, &push_bos); 6294bf215546Sopenharmony_ci 6295bf215546Sopenharmony_ci emit_push_constant_packets(ice, batch, stage, &push_bos); 6296bf215546Sopenharmony_ci#endif 6297bf215546Sopenharmony_ci } 6298bf215546Sopenharmony_ci 6299bf215546Sopenharmony_ci for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 6300bf215546Sopenharmony_ci if (stage_dirty & (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage)) { 6301bf215546Sopenharmony_ci if (ice->shaders.prog[stage]) { 6302bf215546Sopenharmony_ci#if GFX_VER <= 6 6303bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS; 6304bf215546Sopenharmony_ci#endif 6305bf215546Sopenharmony_ci crocus_populate_binding_table(ice, batch, stage, false); 6306bf215546Sopenharmony_ci ice->shaders.prog[stage]->bind_bo_offset = 6307bf215546Sopenharmony_ci crocus_upload_binding_table(ice, batch, 6308bf215546Sopenharmony_ci ice->shaders.prog[stage]->surf_offset, 6309bf215546Sopenharmony_ci ice->shaders.prog[stage]->bt.size_bytes); 6310bf215546Sopenharmony_ci 6311bf215546Sopenharmony_ci#if GFX_VER >= 7 6312bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { 6313bf215546Sopenharmony_ci ptr._3DCommandSubOpcode = 38 + stage; 6314bf215546Sopenharmony_ci ptr.PointertoVSBindingTable = ice->shaders.prog[stage]->bind_bo_offset; 6315bf215546Sopenharmony_ci } 6316bf215546Sopenharmony_ci#endif 6317bf215546Sopenharmony_ci#if GFX_VER == 6 6318bf215546Sopenharmony_ci } else if (stage == MESA_SHADER_GEOMETRY && ice->shaders.ff_gs_prog) { 6319bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS; 6320bf215546Sopenharmony_ci crocus_populate_binding_table(ice, batch, stage, true); 6321bf215546Sopenharmony_ci ice->shaders.ff_gs_prog->bind_bo_offset = 6322bf215546Sopenharmony_ci crocus_upload_binding_table(ice, batch, 6323bf215546Sopenharmony_ci ice->shaders.ff_gs_prog->surf_offset, 6324bf215546Sopenharmony_ci ice->shaders.ff_gs_prog->bt.size_bytes); 6325bf215546Sopenharmony_ci#endif 6326bf215546Sopenharmony_ci } 6327bf215546Sopenharmony_ci } 6328bf215546Sopenharmony_ci } 6329bf215546Sopenharmony_ci#if GFX_VER <= 6 6330bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS) { 6331bf215546Sopenharmony_ci struct crocus_compiled_shader *gs = ice->shaders.prog[MESA_SHADER_GEOMETRY]; 6332bf215546Sopenharmony_ci if (gs == NULL) 6333bf215546Sopenharmony_ci gs = ice->shaders.ff_gs_prog; 6334bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), ptr) { 6335bf215546Sopenharmony_ci ptr.PointertoVSBindingTable = ice->shaders.prog[MESA_SHADER_VERTEX]->bind_bo_offset; 6336bf215546Sopenharmony_ci ptr.PointertoPSBindingTable = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bind_bo_offset; 6337bf215546Sopenharmony_ci#if GFX_VER == 6 6338bf215546Sopenharmony_ci ptr.VSBindingTableChange = true; 6339bf215546Sopenharmony_ci ptr.PSBindingTableChange = true; 6340bf215546Sopenharmony_ci ptr.GSBindingTableChange = gs ? true : false; 6341bf215546Sopenharmony_ci ptr.PointertoGSBindingTable = gs ? gs->bind_bo_offset : 0; 6342bf215546Sopenharmony_ci#endif 6343bf215546Sopenharmony_ci } 6344bf215546Sopenharmony_ci } 6345bf215546Sopenharmony_ci#endif 6346bf215546Sopenharmony_ci 6347bf215546Sopenharmony_ci bool sampler_updates = dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS; 6348bf215546Sopenharmony_ci for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { 6349bf215546Sopenharmony_ci if (!(stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage)) || 6350bf215546Sopenharmony_ci !ice->shaders.prog[stage]) 6351bf215546Sopenharmony_ci continue; 6352bf215546Sopenharmony_ci 6353bf215546Sopenharmony_ci crocus_upload_sampler_states(ice, batch, stage); 6354bf215546Sopenharmony_ci 6355bf215546Sopenharmony_ci sampler_updates = true; 6356bf215546Sopenharmony_ci 6357bf215546Sopenharmony_ci#if GFX_VER >= 7 6358bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 6359bf215546Sopenharmony_ci 6360bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { 6361bf215546Sopenharmony_ci ptr._3DCommandSubOpcode = 43 + stage; 6362bf215546Sopenharmony_ci ptr.PointertoVSSamplerState = shs->sampler_offset; 6363bf215546Sopenharmony_ci } 6364bf215546Sopenharmony_ci#endif 6365bf215546Sopenharmony_ci } 6366bf215546Sopenharmony_ci 6367bf215546Sopenharmony_ci if (sampler_updates) { 6368bf215546Sopenharmony_ci#if GFX_VER == 6 6369bf215546Sopenharmony_ci struct crocus_shader_state *shs_vs = &ice->state.shaders[MESA_SHADER_VERTEX]; 6370bf215546Sopenharmony_ci struct crocus_shader_state *shs_gs = &ice->state.shaders[MESA_SHADER_GEOMETRY]; 6371bf215546Sopenharmony_ci struct crocus_shader_state *shs_fs = &ice->state.shaders[MESA_SHADER_FRAGMENT]; 6372bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ptr) { 6373bf215546Sopenharmony_ci if (ice->shaders.prog[MESA_SHADER_VERTEX] && 6374bf215546Sopenharmony_ci (dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS || 6375bf215546Sopenharmony_ci stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_VERTEX))) { 6376bf215546Sopenharmony_ci ptr.VSSamplerStateChange = true; 6377bf215546Sopenharmony_ci ptr.PointertoVSSamplerState = shs_vs->sampler_offset; 6378bf215546Sopenharmony_ci } 6379bf215546Sopenharmony_ci if (ice->shaders.prog[MESA_SHADER_GEOMETRY] && 6380bf215546Sopenharmony_ci (dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS || 6381bf215546Sopenharmony_ci stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_GEOMETRY))) { 6382bf215546Sopenharmony_ci ptr.GSSamplerStateChange = true; 6383bf215546Sopenharmony_ci ptr.PointertoGSSamplerState = shs_gs->sampler_offset; 6384bf215546Sopenharmony_ci } 6385bf215546Sopenharmony_ci if (ice->shaders.prog[MESA_SHADER_FRAGMENT] && 6386bf215546Sopenharmony_ci (dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS || 6387bf215546Sopenharmony_ci stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_FRAGMENT))) { 6388bf215546Sopenharmony_ci ptr.PSSamplerStateChange = true; 6389bf215546Sopenharmony_ci ptr.PointertoPSSamplerState = shs_fs->sampler_offset; 6390bf215546Sopenharmony_ci } 6391bf215546Sopenharmony_ci } 6392bf215546Sopenharmony_ci#endif 6393bf215546Sopenharmony_ci } 6394bf215546Sopenharmony_ci 6395bf215546Sopenharmony_ci#if GFX_VER >= 6 6396bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_MULTISAMPLE) { 6397bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) { 6398bf215546Sopenharmony_ci ms.PixelLocation = 6399bf215546Sopenharmony_ci ice->state.cso_rast->cso.half_pixel_center ? CENTER : UL_CORNER; 6400bf215546Sopenharmony_ci if (ice->state.framebuffer.samples > 0) 6401bf215546Sopenharmony_ci ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1; 6402bf215546Sopenharmony_ci#if GFX_VER == 6 6403bf215546Sopenharmony_ci INTEL_SAMPLE_POS_4X(ms.Sample); 6404bf215546Sopenharmony_ci#elif GFX_VER == 7 6405bf215546Sopenharmony_ci switch (ice->state.framebuffer.samples) { 6406bf215546Sopenharmony_ci case 1: 6407bf215546Sopenharmony_ci INTEL_SAMPLE_POS_1X(ms.Sample); 6408bf215546Sopenharmony_ci break; 6409bf215546Sopenharmony_ci case 2: 6410bf215546Sopenharmony_ci INTEL_SAMPLE_POS_2X(ms.Sample); 6411bf215546Sopenharmony_ci break; 6412bf215546Sopenharmony_ci case 4: 6413bf215546Sopenharmony_ci INTEL_SAMPLE_POS_4X(ms.Sample); 6414bf215546Sopenharmony_ci break; 6415bf215546Sopenharmony_ci case 8: 6416bf215546Sopenharmony_ci INTEL_SAMPLE_POS_8X(ms.Sample); 6417bf215546Sopenharmony_ci break; 6418bf215546Sopenharmony_ci default: 6419bf215546Sopenharmony_ci break; 6420bf215546Sopenharmony_ci } 6421bf215546Sopenharmony_ci#endif 6422bf215546Sopenharmony_ci } 6423bf215546Sopenharmony_ci } 6424bf215546Sopenharmony_ci 6425bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_SAMPLE_MASK) { 6426bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) { 6427bf215546Sopenharmony_ci ms.SampleMask = determine_sample_mask(ice); 6428bf215546Sopenharmony_ci } 6429bf215546Sopenharmony_ci } 6430bf215546Sopenharmony_ci#endif 6431bf215546Sopenharmony_ci 6432bf215546Sopenharmony_ci#if GFX_VER >= 7 6433bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT]; 6434bf215546Sopenharmony_ci if ((stage_dirty & CROCUS_STAGE_DIRTY_FS) && shader) { 6435bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = shader->prog_data; 6436bf215546Sopenharmony_ci struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 6437bf215546Sopenharmony_ci 6438bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_PS), ps) { 6439bf215546Sopenharmony_ci 6440bf215546Sopenharmony_ci /* Initialize the execution mask with VMask. Otherwise, derivatives are 6441bf215546Sopenharmony_ci * incorrect for subspans where some of the pixels are unlit. We believe 6442bf215546Sopenharmony_ci * the bit just didn't take effect in previous generations. 6443bf215546Sopenharmony_ci */ 6444bf215546Sopenharmony_ci ps.VectorMaskEnable = GFX_VER >= 8 && wm_prog_data->uses_vmask; 6445bf215546Sopenharmony_ci 6446bf215546Sopenharmony_ci ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; 6447bf215546Sopenharmony_ci ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; 6448bf215546Sopenharmony_ci ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; 6449bf215546Sopenharmony_ci 6450bf215546Sopenharmony_ci ps.DispatchGRFStartRegisterForConstantSetupData0 = 6451bf215546Sopenharmony_ci brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); 6452bf215546Sopenharmony_ci ps.DispatchGRFStartRegisterForConstantSetupData1 = 6453bf215546Sopenharmony_ci brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); 6454bf215546Sopenharmony_ci ps.DispatchGRFStartRegisterForConstantSetupData2 = 6455bf215546Sopenharmony_ci brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); 6456bf215546Sopenharmony_ci 6457bf215546Sopenharmony_ci ps.KernelStartPointer0 = KSP(ice, shader) + 6458bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); 6459bf215546Sopenharmony_ci ps.KernelStartPointer1 = KSP(ice, shader) + 6460bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); 6461bf215546Sopenharmony_ci ps.KernelStartPointer2 = KSP(ice, shader) + 6462bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); 6463bf215546Sopenharmony_ci 6464bf215546Sopenharmony_ci#if GFX_VERx10 == 75 6465bf215546Sopenharmony_ci ps.SampleMask = determine_sample_mask(ice); 6466bf215546Sopenharmony_ci#endif 6467bf215546Sopenharmony_ci // XXX: WABTPPrefetchDisable, see above, drop at C0 6468bf215546Sopenharmony_ci ps.BindingTableEntryCount = shader->bt.size_bytes / 4; 6469bf215546Sopenharmony_ci ps.FloatingPointMode = prog_data->use_alt_mode; 6470bf215546Sopenharmony_ci#if GFX_VER >= 8 6471bf215546Sopenharmony_ci ps.MaximumNumberofThreadsPerPSD = 6472bf215546Sopenharmony_ci batch->screen->devinfo.max_threads_per_psd - 2; 6473bf215546Sopenharmony_ci#else 6474bf215546Sopenharmony_ci ps.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1; 6475bf215546Sopenharmony_ci#endif 6476bf215546Sopenharmony_ci 6477bf215546Sopenharmony_ci ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0; 6478bf215546Sopenharmony_ci 6479bf215546Sopenharmony_ci#if GFX_VER < 8 6480bf215546Sopenharmony_ci ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 6481bf215546Sopenharmony_ci ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend && ice->state.cso_blend->dual_color_blending; 6482bf215546Sopenharmony_ci ps.AttributeEnable = (wm_prog_data->num_varying_inputs != 0); 6483bf215546Sopenharmony_ci#endif 6484bf215546Sopenharmony_ci /* From the documentation for this packet: 6485bf215546Sopenharmony_ci * "If the PS kernel does not need the Position XY Offsets to 6486bf215546Sopenharmony_ci * compute a Position Value, then this field should be programmed 6487bf215546Sopenharmony_ci * to POSOFFSET_NONE." 6488bf215546Sopenharmony_ci * 6489bf215546Sopenharmony_ci * "SW Recommendation: If the PS kernel needs the Position Offsets 6490bf215546Sopenharmony_ci * to compute a Position XY value, this field should match Position 6491bf215546Sopenharmony_ci * ZW Interpolation Mode to ensure a consistent position.xyzw 6492bf215546Sopenharmony_ci * computation." 6493bf215546Sopenharmony_ci * 6494bf215546Sopenharmony_ci * We only require XY sample offsets. So, this recommendation doesn't 6495bf215546Sopenharmony_ci * look useful at the moment. We might need this in future. 6496bf215546Sopenharmony_ci */ 6497bf215546Sopenharmony_ci ps.PositionXYOffsetSelect = 6498bf215546Sopenharmony_ci wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; 6499bf215546Sopenharmony_ci 6500bf215546Sopenharmony_ci if (wm_prog_data->base.total_scratch) { 6501bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT); 6502bf215546Sopenharmony_ci ps.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11; 6503bf215546Sopenharmony_ci ps.ScratchSpaceBasePointer = rw_bo(bo, 0); 6504bf215546Sopenharmony_ci } 6505bf215546Sopenharmony_ci } 6506bf215546Sopenharmony_ci#if GFX_VER == 8 6507bf215546Sopenharmony_ci const struct shader_info *fs_info = 6508bf215546Sopenharmony_ci crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT); 6509bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) { 6510bf215546Sopenharmony_ci psx.PixelShaderValid = true; 6511bf215546Sopenharmony_ci psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 6512bf215546Sopenharmony_ci psx.PixelShaderKillsPixel = wm_prog_data->uses_kill; 6513bf215546Sopenharmony_ci psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; 6514bf215546Sopenharmony_ci psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 6515bf215546Sopenharmony_ci psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 6516bf215546Sopenharmony_ci psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; 6517bf215546Sopenharmony_ci 6518bf215546Sopenharmony_ci /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */ 6519bf215546Sopenharmony_ci if (wm_prog_data->uses_sample_mask) 6520bf215546Sopenharmony_ci psx.PixelShaderUsesInputCoverageMask = true; 6521bf215546Sopenharmony_ci 6522bf215546Sopenharmony_ci psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 6523bf215546Sopenharmony_ci 6524bf215546Sopenharmony_ci /* The stricter cross-primitive coherency guarantees that the hardware 6525bf215546Sopenharmony_ci * gives us with the "Accesses UAV" bit set for at least one shader stage 6526bf215546Sopenharmony_ci * and the "UAV coherency required" bit set on the 3DPRIMITIVE command 6527bf215546Sopenharmony_ci * are redundant within the current image, atomic counter and SSBO GL 6528bf215546Sopenharmony_ci * APIs, which all have very loose ordering and coherency requirements 6529bf215546Sopenharmony_ci * and generally rely on the application to insert explicit barriers when 6530bf215546Sopenharmony_ci * a shader invocation is expected to see the memory writes performed by 6531bf215546Sopenharmony_ci * the invocations of some previous primitive. Regardless of the value 6532bf215546Sopenharmony_ci * of "UAV coherency required", the "Accesses UAV" bits will implicitly 6533bf215546Sopenharmony_ci * cause an in most cases useless DC flush when the lowermost stage with 6534bf215546Sopenharmony_ci * the bit set finishes execution. 6535bf215546Sopenharmony_ci * 6536bf215546Sopenharmony_ci * It would be nice to disable it, but in some cases we can't because on 6537bf215546Sopenharmony_ci * Gfx8+ it also has an influence on rasterization via the PS UAV-only 6538bf215546Sopenharmony_ci * signal (which could be set independently from the coherency mechanism 6539bf215546Sopenharmony_ci * in the 3DSTATE_WM command on Gfx7), and because in some cases it will 6540bf215546Sopenharmony_ci * determine whether the hardware skips execution of the fragment shader 6541bf215546Sopenharmony_ci * or not via the ThreadDispatchEnable signal. However if we know that 6542bf215546Sopenharmony_ci * GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and 6543bf215546Sopenharmony_ci * GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any 6544bf215546Sopenharmony_ci * difference so we may just disable it here. 6545bf215546Sopenharmony_ci * 6546bf215546Sopenharmony_ci * Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't 6547bf215546Sopenharmony_ci * take into account KillPixels when no depth or stencil writes are 6548bf215546Sopenharmony_ci * enabled. In order for occlusion queries to work correctly with no 6549bf215546Sopenharmony_ci * attachments, we need to force-enable here. 6550bf215546Sopenharmony_ci * 6551bf215546Sopenharmony_ci */ 6552bf215546Sopenharmony_ci if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && 6553bf215546Sopenharmony_ci !(has_writeable_rt(ice->state.cso_blend, fs_info))) 6554bf215546Sopenharmony_ci psx.PixelShaderHasUAV = true; 6555bf215546Sopenharmony_ci } 6556bf215546Sopenharmony_ci#endif 6557bf215546Sopenharmony_ci } 6558bf215546Sopenharmony_ci#endif 6559bf215546Sopenharmony_ci 6560bf215546Sopenharmony_ci#if GFX_VER >= 7 6561bf215546Sopenharmony_ci if (ice->state.streamout_active) { 6562bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN7_SO_BUFFERS) { 6563bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 6564bf215546Sopenharmony_ci struct crocus_stream_output_target *tgt = 6565bf215546Sopenharmony_ci (void *) ice->state.so_target[i]; 6566bf215546Sopenharmony_ci 6567bf215546Sopenharmony_ci if (!tgt) { 6568bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) { 6569bf215546Sopenharmony_ci sob.SOBufferIndex = i; 6570bf215546Sopenharmony_ci sob.MOCS = crocus_mocs(NULL, &batch->screen->isl_dev); 6571bf215546Sopenharmony_ci } 6572bf215546Sopenharmony_ci continue; 6573bf215546Sopenharmony_ci } 6574bf215546Sopenharmony_ci struct crocus_resource *res = (void *) tgt->base.buffer; 6575bf215546Sopenharmony_ci uint32_t start = tgt->base.buffer_offset; 6576bf215546Sopenharmony_ci#if GFX_VER < 8 6577bf215546Sopenharmony_ci uint32_t end = ALIGN(start + tgt->base.buffer_size, 4); 6578bf215546Sopenharmony_ci#endif 6579bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) { 6580bf215546Sopenharmony_ci sob.SOBufferIndex = i; 6581bf215546Sopenharmony_ci 6582bf215546Sopenharmony_ci sob.SurfaceBaseAddress = rw_bo(res->bo, start); 6583bf215546Sopenharmony_ci sob.MOCS = crocus_mocs(res->bo, &batch->screen->isl_dev); 6584bf215546Sopenharmony_ci#if GFX_VER < 8 6585bf215546Sopenharmony_ci sob.SurfacePitch = tgt->stride; 6586bf215546Sopenharmony_ci sob.SurfaceEndAddress = rw_bo(res->bo, end); 6587bf215546Sopenharmony_ci#else 6588bf215546Sopenharmony_ci sob.SOBufferEnable = true; 6589bf215546Sopenharmony_ci sob.StreamOffsetWriteEnable = true; 6590bf215546Sopenharmony_ci sob.StreamOutputBufferOffsetAddressEnable = true; 6591bf215546Sopenharmony_ci 6592bf215546Sopenharmony_ci sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1; 6593bf215546Sopenharmony_ci sob.StreamOutputBufferOffsetAddress = 6594bf215546Sopenharmony_ci rw_bo(crocus_resource_bo(&tgt->offset_res->base.b), tgt->offset_offset); 6595bf215546Sopenharmony_ci if (tgt->zero_offset) { 6596bf215546Sopenharmony_ci sob.StreamOffset = 0; 6597bf215546Sopenharmony_ci tgt->zero_offset = false; 6598bf215546Sopenharmony_ci } else 6599bf215546Sopenharmony_ci sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */ 6600bf215546Sopenharmony_ci#endif 6601bf215546Sopenharmony_ci } 6602bf215546Sopenharmony_ci } 6603bf215546Sopenharmony_ci } 6604bf215546Sopenharmony_ci 6605bf215546Sopenharmony_ci if ((dirty & CROCUS_DIRTY_SO_DECL_LIST) && ice->state.streamout) { 6606bf215546Sopenharmony_ci uint32_t *decl_list = 6607bf215546Sopenharmony_ci ice->state.streamout + GENX(3DSTATE_STREAMOUT_length); 6608bf215546Sopenharmony_ci crocus_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2)); 6609bf215546Sopenharmony_ci } 6610bf215546Sopenharmony_ci 6611bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_STREAMOUT) { 6612bf215546Sopenharmony_ci const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 6613bf215546Sopenharmony_ci 6614bf215546Sopenharmony_ci uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)]; 6615bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) { 6616bf215546Sopenharmony_ci sol.SOFunctionEnable = true; 6617bf215546Sopenharmony_ci sol.SOStatisticsEnable = true; 6618bf215546Sopenharmony_ci 6619bf215546Sopenharmony_ci sol.RenderingDisable = cso_rast->cso.rasterizer_discard && 6620bf215546Sopenharmony_ci !ice->state.prims_generated_query_active; 6621bf215546Sopenharmony_ci sol.ReorderMode = cso_rast->cso.flatshade_first ? LEADING : TRAILING; 6622bf215546Sopenharmony_ci } 6623bf215546Sopenharmony_ci 6624bf215546Sopenharmony_ci assert(ice->state.streamout); 6625bf215546Sopenharmony_ci 6626bf215546Sopenharmony_ci crocus_emit_merge(batch, ice->state.streamout, dynamic_sol, 6627bf215546Sopenharmony_ci GENX(3DSTATE_STREAMOUT_length)); 6628bf215546Sopenharmony_ci } 6629bf215546Sopenharmony_ci } else { 6630bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_STREAMOUT) { 6631bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol); 6632bf215546Sopenharmony_ci } 6633bf215546Sopenharmony_ci } 6634bf215546Sopenharmony_ci#endif 6635bf215546Sopenharmony_ci#if GFX_VER == 6 6636bf215546Sopenharmony_ci if (ice->state.streamout_active) { 6637bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_SVBI) { 6638bf215546Sopenharmony_ci crocus_emit_so_svbi(ice); 6639bf215546Sopenharmony_ci } 6640bf215546Sopenharmony_ci } 6641bf215546Sopenharmony_ci#endif 6642bf215546Sopenharmony_ci 6643bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_CLIP) { 6644bf215546Sopenharmony_ci#if GFX_VER < 6 6645bf215546Sopenharmony_ci const struct brw_clip_prog_data *clip_prog_data = (struct brw_clip_prog_data *)ice->shaders.clip_prog->prog_data; 6646bf215546Sopenharmony_ci struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso; 6647bf215546Sopenharmony_ci 6648bf215546Sopenharmony_ci uint32_t *clip_ptr = stream_state(batch, GENX(CLIP_STATE_length) * 4, 32, &ice->shaders.clip_offset); 6649bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS; 6650bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(CLIP_STATE), clip_ptr, clip) { 6651bf215546Sopenharmony_ci clip.KernelStartPointer = KSP(ice, ice->shaders.clip_prog); 6652bf215546Sopenharmony_ci clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 6653bf215546Sopenharmony_ci clip.SingleProgramFlow = true; 6654bf215546Sopenharmony_ci clip.GRFRegisterCount = DIV_ROUND_UP(clip_prog_data->total_grf, 16) - 1; 6655bf215546Sopenharmony_ci 6656bf215546Sopenharmony_ci clip.VertexURBEntryReadLength = clip_prog_data->urb_read_length; 6657bf215546Sopenharmony_ci clip.ConstantURBEntryReadLength = clip_prog_data->curb_read_length; 6658bf215546Sopenharmony_ci 6659bf215546Sopenharmony_ci clip.DispatchGRFStartRegisterForURBData = 1; 6660bf215546Sopenharmony_ci clip.VertexURBEntryReadOffset = 0; 6661bf215546Sopenharmony_ci clip.ConstantURBEntryReadOffset = ice->curbe.clip_start * 2; 6662bf215546Sopenharmony_ci 6663bf215546Sopenharmony_ci clip.NumberofURBEntries = batch->ice->urb.nr_clip_entries; 6664bf215546Sopenharmony_ci clip.URBEntryAllocationSize = batch->ice->urb.vsize - 1; 6665bf215546Sopenharmony_ci 6666bf215546Sopenharmony_ci if (batch->ice->urb.nr_clip_entries >= 10) { 6667bf215546Sopenharmony_ci /* Half of the URB entries go to each thread, and it has to be an 6668bf215546Sopenharmony_ci * even number. 6669bf215546Sopenharmony_ci */ 6670bf215546Sopenharmony_ci assert(batch->ice->urb.nr_clip_entries % 2 == 0); 6671bf215546Sopenharmony_ci 6672bf215546Sopenharmony_ci /* Although up to 16 concurrent Clip threads are allowed on Ironlake, 6673bf215546Sopenharmony_ci * only 2 threads can output VUEs at a time. 6674bf215546Sopenharmony_ci */ 6675bf215546Sopenharmony_ci clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1; 6676bf215546Sopenharmony_ci } else { 6677bf215546Sopenharmony_ci assert(batch->ice->urb.nr_clip_entries >= 5); 6678bf215546Sopenharmony_ci clip.MaximumNumberofThreads = 1 - 1; 6679bf215546Sopenharmony_ci } 6680bf215546Sopenharmony_ci clip.VertexPositionSpace = VPOS_NDCSPACE; 6681bf215546Sopenharmony_ci clip.UserClipFlagsMustClipEnable = true; 6682bf215546Sopenharmony_ci clip.GuardbandClipTestEnable = true; 6683bf215546Sopenharmony_ci 6684bf215546Sopenharmony_ci clip.ClipperViewportStatePointer = ro_bo(batch->state.bo, ice->state.clip_vp_address); 6685bf215546Sopenharmony_ci clip.ScreenSpaceViewportXMin = -1.0; 6686bf215546Sopenharmony_ci clip.ScreenSpaceViewportXMax = 1.0; 6687bf215546Sopenharmony_ci clip.ScreenSpaceViewportYMin = -1.0; 6688bf215546Sopenharmony_ci clip.ScreenSpaceViewportYMax = 1.0; 6689bf215546Sopenharmony_ci clip.ViewportXYClipTestEnable = true; 6690bf215546Sopenharmony_ci clip.ViewportZClipTestEnable = (cso_state->depth_clip_near || cso_state->depth_clip_far); 6691bf215546Sopenharmony_ci 6692bf215546Sopenharmony_ci#if GFX_VER == 5 || GFX_VERx10 == 45 6693bf215546Sopenharmony_ci clip.UserClipDistanceClipTestEnableBitmask = cso_state->clip_plane_enable; 6694bf215546Sopenharmony_ci#else 6695bf215546Sopenharmony_ci /* Up to 6 actual clip flags, plus the 7th for the negative RHW 6696bf215546Sopenharmony_ci * workaround. 6697bf215546Sopenharmony_ci */ 6698bf215546Sopenharmony_ci clip.UserClipDistanceClipTestEnableBitmask = (cso_state->clip_plane_enable & 0x3f) | 0x40; 6699bf215546Sopenharmony_ci#endif 6700bf215546Sopenharmony_ci 6701bf215546Sopenharmony_ci clip.APIMode = cso_state->clip_halfz ? APIMODE_D3D : APIMODE_OGL; 6702bf215546Sopenharmony_ci clip.GuardbandClipTestEnable = true; 6703bf215546Sopenharmony_ci 6704bf215546Sopenharmony_ci clip.ClipMode = clip_prog_data->clip_mode; 6705bf215546Sopenharmony_ci#if GFX_VERx10 == 45 6706bf215546Sopenharmony_ci clip.NegativeWClipTestEnable = true; 6707bf215546Sopenharmony_ci#endif 6708bf215546Sopenharmony_ci } 6709bf215546Sopenharmony_ci 6710bf215546Sopenharmony_ci#else //if GFX_VER >= 6 6711bf215546Sopenharmony_ci struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast; 6712bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data ); 6713bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 6714bf215546Sopenharmony_ci bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] || 6715bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 6716bf215546Sopenharmony_ci bool points_or_lines = cso_rast->fill_mode_point_or_line || 6717bf215546Sopenharmony_ci (gs_or_tes ? ice->shaders.output_topology_is_points_or_lines 6718bf215546Sopenharmony_ci : ice->state.prim_is_points_or_lines); 6719bf215546Sopenharmony_ci uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)]; 6720bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) { 6721bf215546Sopenharmony_ci cl.StatisticsEnable = ice->state.statistics_counters_enabled; 6722bf215546Sopenharmony_ci if (cso_rast->cso.rasterizer_discard) 6723bf215546Sopenharmony_ci cl.ClipMode = CLIPMODE_REJECT_ALL; 6724bf215546Sopenharmony_ci else if (ice->state.window_space_position) 6725bf215546Sopenharmony_ci cl.ClipMode = CLIPMODE_ACCEPT_ALL; 6726bf215546Sopenharmony_ci else 6727bf215546Sopenharmony_ci cl.ClipMode = CLIPMODE_NORMAL; 6728bf215546Sopenharmony_ci 6729bf215546Sopenharmony_ci cl.PerspectiveDivideDisable = ice->state.window_space_position; 6730bf215546Sopenharmony_ci cl.ViewportXYClipTestEnable = !points_or_lines; 6731bf215546Sopenharmony_ci 6732bf215546Sopenharmony_ci cl.UserClipDistanceCullTestEnableBitmask = 6733bf215546Sopenharmony_ci brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->cull_distance_mask; 6734bf215546Sopenharmony_ci 6735bf215546Sopenharmony_ci cl.NonPerspectiveBarycentricEnable = wm_prog_data->uses_nonperspective_interp_modes; 6736bf215546Sopenharmony_ci 6737bf215546Sopenharmony_ci cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1; 6738bf215546Sopenharmony_ci cl.MaximumVPIndex = ice->state.num_viewports - 1; 6739bf215546Sopenharmony_ci } 6740bf215546Sopenharmony_ci crocus_emit_merge(batch, cso_rast->clip, dynamic_clip, 6741bf215546Sopenharmony_ci ARRAY_SIZE(cso_rast->clip)); 6742bf215546Sopenharmony_ci#endif 6743bf215546Sopenharmony_ci } 6744bf215546Sopenharmony_ci 6745bf215546Sopenharmony_ci if (stage_dirty & CROCUS_STAGE_DIRTY_VS) { 6746bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_VERTEX]; 6747bf215546Sopenharmony_ci const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data); 6748bf215546Sopenharmony_ci const struct brw_stage_prog_data *prog_data = &vue_prog_data->base; 6749bf215546Sopenharmony_ci#if GFX_VER == 7 6750bf215546Sopenharmony_ci if (batch->screen->devinfo.platform == INTEL_PLATFORM_IVB) 6751bf215546Sopenharmony_ci gen7_emit_vs_workaround_flush(batch); 6752bf215546Sopenharmony_ci#endif 6753bf215546Sopenharmony_ci 6754bf215546Sopenharmony_ci 6755bf215546Sopenharmony_ci#if GFX_VER == 6 6756bf215546Sopenharmony_ci struct push_bos push_bos = {}; 6757bf215546Sopenharmony_ci setup_constant_buffers(ice, batch, MESA_SHADER_VERTEX, &push_bos); 6758bf215546Sopenharmony_ci 6759bf215546Sopenharmony_ci emit_push_constant_packets(ice, batch, MESA_SHADER_VERTEX, &push_bos); 6760bf215546Sopenharmony_ci#endif 6761bf215546Sopenharmony_ci#if GFX_VER >= 6 6762bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VS), vs) 6763bf215546Sopenharmony_ci#else 6764bf215546Sopenharmony_ci uint32_t *vs_ptr = stream_state(batch, 6765bf215546Sopenharmony_ci GENX(VS_STATE_length) * 4, 32, &ice->shaders.vs_offset); 6766bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS; 6767bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(VS_STATE), vs_ptr, vs) 6768bf215546Sopenharmony_ci#endif 6769bf215546Sopenharmony_ci { 6770bf215546Sopenharmony_ci INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX); 6771bf215546Sopenharmony_ci 6772bf215546Sopenharmony_ci vs.MaximumNumberofThreads = batch->screen->devinfo.max_vs_threads - 1; 6773bf215546Sopenharmony_ci 6774bf215546Sopenharmony_ci#if GFX_VER < 6 6775bf215546Sopenharmony_ci vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1; 6776bf215546Sopenharmony_ci vs.ConstantURBEntryReadLength = vue_prog_data->base.curb_read_length; 6777bf215546Sopenharmony_ci vs.ConstantURBEntryReadOffset = ice->curbe.vs_start * 2; 6778bf215546Sopenharmony_ci 6779bf215546Sopenharmony_ci vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0); 6780bf215546Sopenharmony_ci vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1; 6781bf215546Sopenharmony_ci 6782bf215546Sopenharmony_ci vs.MaximumNumberofThreads = 6783bf215546Sopenharmony_ci CLAMP(batch->ice->urb.nr_vs_entries / 2, 1, batch->screen->devinfo.max_vs_threads) - 1; 6784bf215546Sopenharmony_ci vs.StatisticsEnable = false; 6785bf215546Sopenharmony_ci vs.SamplerStatePointer = ro_bo(batch->state.bo, ice->state.shaders[MESA_SHADER_VERTEX].sampler_offset); 6786bf215546Sopenharmony_ci#endif 6787bf215546Sopenharmony_ci#if GFX_VER == 5 6788bf215546Sopenharmony_ci /* Force single program flow on Ironlake. We cannot reliably get 6789bf215546Sopenharmony_ci * all applications working without it. See: 6790bf215546Sopenharmony_ci * https://bugs.freedesktop.org/show_bug.cgi?id=29172 6791bf215546Sopenharmony_ci * 6792bf215546Sopenharmony_ci * The most notable and reliably failing application is the Humus 6793bf215546Sopenharmony_ci * demo "CelShading" 6794bf215546Sopenharmony_ci */ 6795bf215546Sopenharmony_ci vs.SingleProgramFlow = true; 6796bf215546Sopenharmony_ci vs.SamplerCount = 0; /* hardware requirement */ 6797bf215546Sopenharmony_ci 6798bf215546Sopenharmony_ci#endif 6799bf215546Sopenharmony_ci#if GFX_VER >= 8 6800bf215546Sopenharmony_ci vs.SIMD8DispatchEnable = 6801bf215546Sopenharmony_ci vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; 6802bf215546Sopenharmony_ci 6803bf215546Sopenharmony_ci vs.UserClipDistanceCullTestEnableBitmask = 6804bf215546Sopenharmony_ci vue_prog_data->cull_distance_mask; 6805bf215546Sopenharmony_ci#endif 6806bf215546Sopenharmony_ci } 6807bf215546Sopenharmony_ci 6808bf215546Sopenharmony_ci#if GFX_VER == 6 6809bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, 6810bf215546Sopenharmony_ci "post VS const", 6811bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_STALL | 6812bf215546Sopenharmony_ci PIPE_CONTROL_INSTRUCTION_INVALIDATE | 6813bf215546Sopenharmony_ci PIPE_CONTROL_STATE_CACHE_INVALIDATE); 6814bf215546Sopenharmony_ci#endif 6815bf215546Sopenharmony_ci } 6816bf215546Sopenharmony_ci 6817bf215546Sopenharmony_ci if (stage_dirty & CROCUS_STAGE_DIRTY_GS) { 6818bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_GEOMETRY]; 6819bf215546Sopenharmony_ci bool active = GFX_VER >= 6 && shader; 6820bf215546Sopenharmony_ci#if GFX_VER == 6 6821bf215546Sopenharmony_ci struct push_bos push_bos = {}; 6822bf215546Sopenharmony_ci if (shader) 6823bf215546Sopenharmony_ci setup_constant_buffers(ice, batch, MESA_SHADER_GEOMETRY, &push_bos); 6824bf215546Sopenharmony_ci 6825bf215546Sopenharmony_ci emit_push_constant_packets(ice, batch, MESA_SHADER_GEOMETRY, &push_bos); 6826bf215546Sopenharmony_ci#endif 6827bf215546Sopenharmony_ci#if GFX_VERx10 == 70 6828bf215546Sopenharmony_ci /** 6829bf215546Sopenharmony_ci * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > 6830bf215546Sopenharmony_ci * Geometry > Geometry Shader > State: 6831bf215546Sopenharmony_ci * 6832bf215546Sopenharmony_ci * "Note: Because of corruption in IVB:GT2, software needs to flush the 6833bf215546Sopenharmony_ci * whole fixed function pipeline when the GS enable changes value in 6834bf215546Sopenharmony_ci * the 3DSTATE_GS." 6835bf215546Sopenharmony_ci * 6836bf215546Sopenharmony_ci * The hardware architects have clarified that in this context "flush the 6837bf215546Sopenharmony_ci * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS 6838bf215546Sopenharmony_ci * Stall" bit set. 6839bf215546Sopenharmony_ci */ 6840bf215546Sopenharmony_ci if (batch->screen->devinfo.gt == 2 && ice->state.gs_enabled != active) 6841bf215546Sopenharmony_ci gen7_emit_cs_stall_flush(batch); 6842bf215546Sopenharmony_ci#endif 6843bf215546Sopenharmony_ci#if GFX_VER >= 6 6844bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_GS), gs) 6845bf215546Sopenharmony_ci#else 6846bf215546Sopenharmony_ci uint32_t *gs_ptr = stream_state(batch, 6847bf215546Sopenharmony_ci GENX(GS_STATE_length) * 4, 32, &ice->shaders.gs_offset); 6848bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS; 6849bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(GS_STATE), gs_ptr, gs) 6850bf215546Sopenharmony_ci#endif 6851bf215546Sopenharmony_ci { 6852bf215546Sopenharmony_ci#if GFX_VER >= 6 6853bf215546Sopenharmony_ci if (active) { 6854bf215546Sopenharmony_ci const struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(shader->prog_data); 6855bf215546Sopenharmony_ci const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data); 6856bf215546Sopenharmony_ci const struct brw_stage_prog_data *prog_data = &gs_prog_data->base.base; 6857bf215546Sopenharmony_ci 6858bf215546Sopenharmony_ci INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY); 6859bf215546Sopenharmony_ci#if GFX_VER >= 7 6860bf215546Sopenharmony_ci gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; 6861bf215546Sopenharmony_ci gs.OutputTopology = gs_prog_data->output_topology; 6862bf215546Sopenharmony_ci gs.ControlDataHeaderSize = 6863bf215546Sopenharmony_ci gs_prog_data->control_data_header_size_hwords; 6864bf215546Sopenharmony_ci 6865bf215546Sopenharmony_ci gs.InstanceControl = gs_prog_data->invocations - 1; 6866bf215546Sopenharmony_ci gs.DispatchMode = vue_prog_data->dispatch_mode; 6867bf215546Sopenharmony_ci 6868bf215546Sopenharmony_ci gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; 6869bf215546Sopenharmony_ci 6870bf215546Sopenharmony_ci gs.ControlDataFormat = gs_prog_data->control_data_format; 6871bf215546Sopenharmony_ci#endif 6872bf215546Sopenharmony_ci 6873bf215546Sopenharmony_ci /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between 6874bf215546Sopenharmony_ci * Ivy Bridge and Haswell. 6875bf215546Sopenharmony_ci * 6876bf215546Sopenharmony_ci * On Ivy Bridge, setting this bit causes the vertices of a triangle 6877bf215546Sopenharmony_ci * strip to be delivered to the geometry shader in an order that does 6878bf215546Sopenharmony_ci * not strictly follow the OpenGL spec, but preserves triangle 6879bf215546Sopenharmony_ci * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then 6880bf215546Sopenharmony_ci * the geometry shader sees triangles: 6881bf215546Sopenharmony_ci * 6882bf215546Sopenharmony_ci * (1, 2, 3), (2, 4, 3), (3, 4, 5) 6883bf215546Sopenharmony_ci * 6884bf215546Sopenharmony_ci * (Clearing the bit is even worse, because it fails to preserve 6885bf215546Sopenharmony_ci * orientation). 6886bf215546Sopenharmony_ci * 6887bf215546Sopenharmony_ci * Triangle strips with adjacency always ordered in a way that preserves 6888bf215546Sopenharmony_ci * triangle orientation but does not strictly follow the OpenGL spec, 6889bf215546Sopenharmony_ci * regardless of the setting of this bit. 6890bf215546Sopenharmony_ci * 6891bf215546Sopenharmony_ci * On Haswell, both triangle strips and triangle strips with adjacency 6892bf215546Sopenharmony_ci * are always ordered in a way that preserves triangle orientation. 6893bf215546Sopenharmony_ci * Setting this bit causes the ordering to strictly follow the OpenGL 6894bf215546Sopenharmony_ci * spec. 6895bf215546Sopenharmony_ci * 6896bf215546Sopenharmony_ci * So in either case we want to set the bit. Unfortunately on Ivy 6897bf215546Sopenharmony_ci * Bridge this will get the order close to correct but not perfect. 6898bf215546Sopenharmony_ci */ 6899bf215546Sopenharmony_ci gs.ReorderMode = TRAILING; 6900bf215546Sopenharmony_ci gs.MaximumNumberofThreads = 6901bf215546Sopenharmony_ci GFX_VER == 8 ? (batch->screen->devinfo.max_gs_threads / 2 - 1) : 6902bf215546Sopenharmony_ci (batch->screen->devinfo.max_gs_threads - 1); 6903bf215546Sopenharmony_ci#if GFX_VER < 7 6904bf215546Sopenharmony_ci gs.SOStatisticsEnable = true; 6905bf215546Sopenharmony_ci if (gs_prog_data->num_transform_feedback_bindings) 6906bf215546Sopenharmony_ci gs.SVBIPayloadEnable = ice->state.streamout_active; 6907bf215546Sopenharmony_ci 6908bf215546Sopenharmony_ci /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it 6909bf215546Sopenharmony_ci * was previously done for gen6. 6910bf215546Sopenharmony_ci * 6911bf215546Sopenharmony_ci * TODO: test with both disabled to see if the HW is behaving 6912bf215546Sopenharmony_ci * as expected, like in gen7. 6913bf215546Sopenharmony_ci */ 6914bf215546Sopenharmony_ci gs.SingleProgramFlow = true; 6915bf215546Sopenharmony_ci gs.VectorMaskEnable = true; 6916bf215546Sopenharmony_ci#endif 6917bf215546Sopenharmony_ci#if GFX_VER >= 8 6918bf215546Sopenharmony_ci gs.ExpectedVertexCount = gs_prog_data->vertices_in; 6919bf215546Sopenharmony_ci 6920bf215546Sopenharmony_ci if (gs_prog_data->static_vertex_count != -1) { 6921bf215546Sopenharmony_ci gs.StaticOutput = true; 6922bf215546Sopenharmony_ci gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; 6923bf215546Sopenharmony_ci } 6924bf215546Sopenharmony_ci gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; 6925bf215546Sopenharmony_ci 6926bf215546Sopenharmony_ci gs.UserClipDistanceCullTestEnableBitmask = 6927bf215546Sopenharmony_ci vue_prog_data->cull_distance_mask; 6928bf215546Sopenharmony_ci 6929bf215546Sopenharmony_ci const int urb_entry_write_offset = 1; 6930bf215546Sopenharmony_ci const uint32_t urb_entry_output_length = 6931bf215546Sopenharmony_ci DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - 6932bf215546Sopenharmony_ci urb_entry_write_offset; 6933bf215546Sopenharmony_ci 6934bf215546Sopenharmony_ci gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; 6935bf215546Sopenharmony_ci gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); 6936bf215546Sopenharmony_ci#endif 6937bf215546Sopenharmony_ci } 6938bf215546Sopenharmony_ci#endif 6939bf215546Sopenharmony_ci#if GFX_VER <= 6 6940bf215546Sopenharmony_ci if (!active && ice->shaders.ff_gs_prog) { 6941bf215546Sopenharmony_ci const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data; 6942bf215546Sopenharmony_ci /* In gen6, transform feedback for the VS stage is done with an 6943bf215546Sopenharmony_ci * ad-hoc GS program. This function provides the needed 3DSTATE_GS 6944bf215546Sopenharmony_ci * for this. 6945bf215546Sopenharmony_ci */ 6946bf215546Sopenharmony_ci gs.KernelStartPointer = KSP(ice, ice->shaders.ff_gs_prog); 6947bf215546Sopenharmony_ci gs.SingleProgramFlow = true; 6948bf215546Sopenharmony_ci gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1; 6949bf215546Sopenharmony_ci gs.VertexURBEntryReadLength = gs_prog_data->urb_read_length; 6950bf215546Sopenharmony_ci 6951bf215546Sopenharmony_ci#if GFX_VER <= 5 6952bf215546Sopenharmony_ci gs.GRFRegisterCount = 6953bf215546Sopenharmony_ci DIV_ROUND_UP(gs_prog_data->total_grf, 16) - 1; 6954bf215546Sopenharmony_ci /* BRW_NEW_URB_FENCE */ 6955bf215546Sopenharmony_ci gs.NumberofURBEntries = batch->ice->urb.nr_gs_entries; 6956bf215546Sopenharmony_ci gs.URBEntryAllocationSize = batch->ice->urb.vsize - 1; 6957bf215546Sopenharmony_ci gs.MaximumNumberofThreads = batch->ice->urb.nr_gs_entries >= 8 ? 1 : 0; 6958bf215546Sopenharmony_ci gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 6959bf215546Sopenharmony_ci#else 6960bf215546Sopenharmony_ci gs.Enable = true; 6961bf215546Sopenharmony_ci gs.VectorMaskEnable = true; 6962bf215546Sopenharmony_ci gs.SVBIPayloadEnable = true; 6963bf215546Sopenharmony_ci gs.SVBIPostIncrementEnable = true; 6964bf215546Sopenharmony_ci gs.SVBIPostIncrementValue = gs_prog_data->svbi_postincrement_value; 6965bf215546Sopenharmony_ci gs.SOStatisticsEnable = true; 6966bf215546Sopenharmony_ci gs.MaximumNumberofThreads = batch->screen->devinfo.max_gs_threads - 1; 6967bf215546Sopenharmony_ci#endif 6968bf215546Sopenharmony_ci } 6969bf215546Sopenharmony_ci#endif 6970bf215546Sopenharmony_ci if (!active && !ice->shaders.ff_gs_prog) { 6971bf215546Sopenharmony_ci#if GFX_VER < 8 6972bf215546Sopenharmony_ci gs.DispatchGRFStartRegisterForURBData = 1; 6973bf215546Sopenharmony_ci#if GFX_VER >= 7 6974bf215546Sopenharmony_ci gs.IncludeVertexHandles = true; 6975bf215546Sopenharmony_ci#endif 6976bf215546Sopenharmony_ci#endif 6977bf215546Sopenharmony_ci } 6978bf215546Sopenharmony_ci#if GFX_VER >= 6 6979bf215546Sopenharmony_ci gs.StatisticsEnable = true; 6980bf215546Sopenharmony_ci#endif 6981bf215546Sopenharmony_ci#if GFX_VER == 5 || GFX_VER == 6 6982bf215546Sopenharmony_ci gs.RenderingEnabled = true; 6983bf215546Sopenharmony_ci#endif 6984bf215546Sopenharmony_ci#if GFX_VER <= 5 6985bf215546Sopenharmony_ci gs.MaximumVPIndex = ice->state.num_viewports - 1; 6986bf215546Sopenharmony_ci#endif 6987bf215546Sopenharmony_ci } 6988bf215546Sopenharmony_ci ice->state.gs_enabled = active; 6989bf215546Sopenharmony_ci } 6990bf215546Sopenharmony_ci 6991bf215546Sopenharmony_ci#if GFX_VER >= 7 6992bf215546Sopenharmony_ci if (stage_dirty & CROCUS_STAGE_DIRTY_TCS) { 6993bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_CTRL]; 6994bf215546Sopenharmony_ci 6995bf215546Sopenharmony_ci if (shader) { 6996bf215546Sopenharmony_ci const struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(shader->prog_data); 6997bf215546Sopenharmony_ci const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data); 6998bf215546Sopenharmony_ci const struct brw_stage_prog_data *prog_data = &tcs_prog_data->base.base; 6999bf215546Sopenharmony_ci 7000bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs) { 7001bf215546Sopenharmony_ci INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL); 7002bf215546Sopenharmony_ci hs.InstanceCount = tcs_prog_data->instances - 1; 7003bf215546Sopenharmony_ci hs.IncludeVertexHandles = true; 7004bf215546Sopenharmony_ci hs.MaximumNumberofThreads = batch->screen->devinfo.max_tcs_threads - 1; 7005bf215546Sopenharmony_ci } 7006bf215546Sopenharmony_ci } else { 7007bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs); 7008bf215546Sopenharmony_ci } 7009bf215546Sopenharmony_ci 7010bf215546Sopenharmony_ci } 7011bf215546Sopenharmony_ci 7012bf215546Sopenharmony_ci if (stage_dirty & CROCUS_STAGE_DIRTY_TES) { 7013bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_EVAL]; 7014bf215546Sopenharmony_ci if (shader) { 7015bf215546Sopenharmony_ci const struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(shader->prog_data); 7016bf215546Sopenharmony_ci const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data); 7017bf215546Sopenharmony_ci const struct brw_stage_prog_data *prog_data = &tes_prog_data->base.base; 7018bf215546Sopenharmony_ci 7019bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_TE), te) { 7020bf215546Sopenharmony_ci te.Partitioning = tes_prog_data->partitioning; 7021bf215546Sopenharmony_ci te.OutputTopology = tes_prog_data->output_topology; 7022bf215546Sopenharmony_ci te.TEDomain = tes_prog_data->domain; 7023bf215546Sopenharmony_ci te.TEEnable = true; 7024bf215546Sopenharmony_ci te.MaximumTessellationFactorOdd = 63.0; 7025bf215546Sopenharmony_ci te.MaximumTessellationFactorNotOdd = 64.0; 7026bf215546Sopenharmony_ci }; 7027bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds) { 7028bf215546Sopenharmony_ci INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL); 7029bf215546Sopenharmony_ci 7030bf215546Sopenharmony_ci ds.MaximumNumberofThreads = batch->screen->devinfo.max_tes_threads - 1; 7031bf215546Sopenharmony_ci ds.ComputeWCoordinateEnable = 7032bf215546Sopenharmony_ci tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; 7033bf215546Sopenharmony_ci 7034bf215546Sopenharmony_ci#if GFX_VER >= 8 7035bf215546Sopenharmony_ci if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) 7036bf215546Sopenharmony_ci ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; 7037bf215546Sopenharmony_ci ds.UserClipDistanceCullTestEnableBitmask = 7038bf215546Sopenharmony_ci vue_prog_data->cull_distance_mask; 7039bf215546Sopenharmony_ci#endif 7040bf215546Sopenharmony_ci }; 7041bf215546Sopenharmony_ci } else { 7042bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_TE), te); 7043bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds); 7044bf215546Sopenharmony_ci } 7045bf215546Sopenharmony_ci } 7046bf215546Sopenharmony_ci#endif 7047bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_RASTER) { 7048bf215546Sopenharmony_ci 7049bf215546Sopenharmony_ci#if GFX_VER < 6 7050bf215546Sopenharmony_ci const struct brw_sf_prog_data *sf_prog_data = (struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data; 7051bf215546Sopenharmony_ci struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso; 7052bf215546Sopenharmony_ci uint32_t *sf_ptr = stream_state(batch, 7053bf215546Sopenharmony_ci GENX(SF_STATE_length) * 4, 32, &ice->shaders.sf_offset); 7054bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS; 7055bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(SF_STATE), sf_ptr, sf) { 7056bf215546Sopenharmony_ci sf.KernelStartPointer = KSP(ice, ice->shaders.sf_prog); 7057bf215546Sopenharmony_ci sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 7058bf215546Sopenharmony_ci sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1; 7059bf215546Sopenharmony_ci sf.DispatchGRFStartRegisterForURBData = 3; 7060bf215546Sopenharmony_ci sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; 7061bf215546Sopenharmony_ci sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length; 7062bf215546Sopenharmony_ci sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1; 7063bf215546Sopenharmony_ci sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries; 7064bf215546Sopenharmony_ci sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 7065bf215546Sopenharmony_ci 7066bf215546Sopenharmony_ci sf.SetupViewportStateOffset = ro_bo(batch->state.bo, ice->state.sf_vp_address); 7067bf215546Sopenharmony_ci 7068bf215546Sopenharmony_ci sf.MaximumNumberofThreads = 7069bf215546Sopenharmony_ci MIN2(GFX_VER == 5 ? 48 : 24, batch->ice->urb.nr_sf_entries) - 1; 7070bf215546Sopenharmony_ci 7071bf215546Sopenharmony_ci sf.SpritePointEnable = cso_state->point_quad_rasterization; 7072bf215546Sopenharmony_ci sf.DestinationOriginHorizontalBias = 0.5; 7073bf215546Sopenharmony_ci sf.DestinationOriginVerticalBias = 0.5; 7074bf215546Sopenharmony_ci 7075bf215546Sopenharmony_ci sf.LineEndCapAntialiasingRegionWidth = 7076bf215546Sopenharmony_ci cso_state->line_smooth ? _10pixels : _05pixels; 7077bf215546Sopenharmony_ci sf.LastPixelEnable = cso_state->line_last_pixel; 7078bf215546Sopenharmony_ci sf.AntialiasingEnable = cso_state->line_smooth; 7079bf215546Sopenharmony_ci 7080bf215546Sopenharmony_ci sf.LineWidth = get_line_width(cso_state); 7081bf215546Sopenharmony_ci sf.PointWidth = cso_state->point_size; 7082bf215546Sopenharmony_ci sf.PointWidthSource = cso_state->point_size_per_vertex ? Vertex : State; 7083bf215546Sopenharmony_ci#if GFX_VERx10 >= 45 7084bf215546Sopenharmony_ci sf.AALineDistanceMode = AALINEDISTANCE_TRUE; 7085bf215546Sopenharmony_ci#endif 7086bf215546Sopenharmony_ci sf.ViewportTransformEnable = true; 7087bf215546Sopenharmony_ci sf.FrontWinding = cso_state->front_ccw ? 1 : 0; 7088bf215546Sopenharmony_ci sf.ScissorRectangleEnable = true; 7089bf215546Sopenharmony_ci sf.CullMode = translate_cull_mode(cso_state->cull_face); 7090bf215546Sopenharmony_ci 7091bf215546Sopenharmony_ci if (cso_state->flatshade_first) { 7092bf215546Sopenharmony_ci sf.TriangleFanProvokingVertexSelect = 1; 7093bf215546Sopenharmony_ci } else { 7094bf215546Sopenharmony_ci sf.TriangleStripListProvokingVertexSelect = 2; 7095bf215546Sopenharmony_ci sf.TriangleFanProvokingVertexSelect = 2; 7096bf215546Sopenharmony_ci sf.LineStripListProvokingVertexSelect = 1; 7097bf215546Sopenharmony_ci } 7098bf215546Sopenharmony_ci } 7099bf215546Sopenharmony_ci#else 7100bf215546Sopenharmony_ci struct crocus_rasterizer_state *cso = ice->state.cso_rast; 7101bf215546Sopenharmony_ci uint32_t dynamic_sf[GENX(3DSTATE_SF_length)]; 7102bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) { 7103bf215546Sopenharmony_ci sf.ViewportTransformEnable = !ice->state.window_space_position; 7104bf215546Sopenharmony_ci 7105bf215546Sopenharmony_ci#if GFX_VER == 6 7106bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data); 7107bf215546Sopenharmony_ci uint32_t urb_entry_read_length; 7108bf215546Sopenharmony_ci uint32_t urb_entry_read_offset; 7109bf215546Sopenharmony_ci uint32_t point_sprite_enables; 7110bf215546Sopenharmony_ci calculate_attr_overrides(ice, sf.Attribute, &point_sprite_enables, 7111bf215546Sopenharmony_ci &urb_entry_read_length, 7112bf215546Sopenharmony_ci &urb_entry_read_offset); 7113bf215546Sopenharmony_ci sf.VertexURBEntryReadLength = urb_entry_read_length; 7114bf215546Sopenharmony_ci sf.VertexURBEntryReadOffset = urb_entry_read_offset; 7115bf215546Sopenharmony_ci sf.PointSpriteTextureCoordinateEnable = point_sprite_enables; 7116bf215546Sopenharmony_ci sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs; 7117bf215546Sopenharmony_ci sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 7118bf215546Sopenharmony_ci#endif 7119bf215546Sopenharmony_ci 7120bf215546Sopenharmony_ci#if GFX_VER >= 6 && GFX_VER < 8 7121bf215546Sopenharmony_ci if (ice->state.framebuffer.samples > 1 && ice->state.cso_rast->cso.multisample) 7122bf215546Sopenharmony_ci sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 7123bf215546Sopenharmony_ci#endif 7124bf215546Sopenharmony_ci#if GFX_VER == 7 7125bf215546Sopenharmony_ci if (ice->state.framebuffer.zsbuf) { 7126bf215546Sopenharmony_ci struct crocus_resource *zres, *sres; 7127bf215546Sopenharmony_ci crocus_get_depth_stencil_resources(&batch->screen->devinfo, 7128bf215546Sopenharmony_ci ice->state.framebuffer.zsbuf->texture, 7129bf215546Sopenharmony_ci &zres, &sres); 7130bf215546Sopenharmony_ci /* ANV thinks that the stencil-ness doesn't matter, this is just 7131bf215546Sopenharmony_ci * about handling polygon offset scaling. 7132bf215546Sopenharmony_ci */ 7133bf215546Sopenharmony_ci sf.DepthBufferSurfaceFormat = zres ? isl_format_get_depth_format(zres->surf.format, false) : D16_UNORM; 7134bf215546Sopenharmony_ci } 7135bf215546Sopenharmony_ci#endif 7136bf215546Sopenharmony_ci } 7137bf215546Sopenharmony_ci crocus_emit_merge(batch, cso->sf, dynamic_sf, 7138bf215546Sopenharmony_ci ARRAY_SIZE(dynamic_sf)); 7139bf215546Sopenharmony_ci#if GFX_VER == 8 7140bf215546Sopenharmony_ci crocus_batch_emit(batch, cso->raster, sizeof(cso->raster)); 7141bf215546Sopenharmony_ci#endif 7142bf215546Sopenharmony_ci#endif 7143bf215546Sopenharmony_ci } 7144bf215546Sopenharmony_ci 7145bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_WM) { 7146bf215546Sopenharmony_ci struct crocus_rasterizer_state *cso = ice->state.cso_rast; 7147bf215546Sopenharmony_ci const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data); 7148bf215546Sopenharmony_ci UNUSED bool writes_depth = wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; 7149bf215546Sopenharmony_ci UNUSED const struct shader_info *fs_info = 7150bf215546Sopenharmony_ci crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT); 7151bf215546Sopenharmony_ci 7152bf215546Sopenharmony_ci#if GFX_VER == 6 7153bf215546Sopenharmony_ci struct push_bos push_bos = {}; 7154bf215546Sopenharmony_ci setup_constant_buffers(ice, batch, MESA_SHADER_FRAGMENT, &push_bos); 7155bf215546Sopenharmony_ci 7156bf215546Sopenharmony_ci emit_push_constant_packets(ice, batch, MESA_SHADER_FRAGMENT, &push_bos); 7157bf215546Sopenharmony_ci#endif 7158bf215546Sopenharmony_ci#if GFX_VER >= 6 7159bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_WM), wm) 7160bf215546Sopenharmony_ci#else 7161bf215546Sopenharmony_ci uint32_t *wm_ptr = stream_state(batch, 7162bf215546Sopenharmony_ci GENX(WM_STATE_length) * 4, 32, &ice->shaders.wm_offset); 7163bf215546Sopenharmony_ci 7164bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS; 7165bf215546Sopenharmony_ci 7166bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(WM_STATE), wm_ptr, wm) 7167bf215546Sopenharmony_ci#endif 7168bf215546Sopenharmony_ci { 7169bf215546Sopenharmony_ci#if GFX_VER <= 6 7170bf215546Sopenharmony_ci wm._8PixelDispatchEnable = wm_prog_data->dispatch_8; 7171bf215546Sopenharmony_ci wm._16PixelDispatchEnable = wm_prog_data->dispatch_16; 7172bf215546Sopenharmony_ci wm._32PixelDispatchEnable = wm_prog_data->dispatch_32; 7173bf215546Sopenharmony_ci#endif 7174bf215546Sopenharmony_ci#if GFX_VER == 4 7175bf215546Sopenharmony_ci /* On gen4, we only have one shader kernel */ 7176bf215546Sopenharmony_ci if (brw_wm_state_has_ksp(wm, 0)) { 7177bf215546Sopenharmony_ci wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]); 7178bf215546Sopenharmony_ci wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0); 7179bf215546Sopenharmony_ci wm.DispatchGRFStartRegisterForConstantSetupData0 = 7180bf215546Sopenharmony_ci wm_prog_data->base.dispatch_grf_start_reg; 7181bf215546Sopenharmony_ci } 7182bf215546Sopenharmony_ci#elif GFX_VER == 5 7183bf215546Sopenharmony_ci wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) + 7184bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0); 7185bf215546Sopenharmony_ci wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) + 7186bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1); 7187bf215546Sopenharmony_ci wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) + 7188bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2); 7189bf215546Sopenharmony_ci 7190bf215546Sopenharmony_ci wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0); 7191bf215546Sopenharmony_ci wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1); 7192bf215546Sopenharmony_ci wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2); 7193bf215546Sopenharmony_ci 7194bf215546Sopenharmony_ci wm.DispatchGRFStartRegisterForConstantSetupData0 = 7195bf215546Sopenharmony_ci wm_prog_data->base.dispatch_grf_start_reg; 7196bf215546Sopenharmony_ci#elif GFX_VER == 6 7197bf215546Sopenharmony_ci wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) + 7198bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0); 7199bf215546Sopenharmony_ci wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) + 7200bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1); 7201bf215546Sopenharmony_ci wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) + 7202bf215546Sopenharmony_ci brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2); 7203bf215546Sopenharmony_ci 7204bf215546Sopenharmony_ci wm.DispatchGRFStartRegisterForConstantSetupData0 = 7205bf215546Sopenharmony_ci brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0); 7206bf215546Sopenharmony_ci wm.DispatchGRFStartRegisterForConstantSetupData1 = 7207bf215546Sopenharmony_ci brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1); 7208bf215546Sopenharmony_ci wm.DispatchGRFStartRegisterForConstantSetupData2 = 7209bf215546Sopenharmony_ci brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2); 7210bf215546Sopenharmony_ci#endif 7211bf215546Sopenharmony_ci#if GFX_VER <= 5 7212bf215546Sopenharmony_ci wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length; 7213bf215546Sopenharmony_ci wm.ConstantURBEntryReadOffset = ice->curbe.wm_start * 2; 7214bf215546Sopenharmony_ci wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2; 7215bf215546Sopenharmony_ci wm.SetupURBEntryReadOffset = 0; 7216bf215546Sopenharmony_ci wm.EarlyDepthTestEnable = true; 7217bf215546Sopenharmony_ci wm.LineAntialiasingRegionWidth = _05pixels; 7218bf215546Sopenharmony_ci wm.LineEndCapAntialiasingRegionWidth = _10pixels; 7219bf215546Sopenharmony_ci wm.DepthCoefficientURBReadOffset = 1; 7220bf215546Sopenharmony_ci 7221bf215546Sopenharmony_ci if (cso->cso.offset_tri) { 7222bf215546Sopenharmony_ci wm.GlobalDepthOffsetEnable = true; 7223bf215546Sopenharmony_ci 7224bf215546Sopenharmony_ci /* Something weird going on with legacy_global_depth_bias, 7225bf215546Sopenharmony_ci * offset_constant, scaling and MRD. This value passes glean 7226bf215546Sopenharmony_ci * but gives some odd results elsewere (eg. the 7227bf215546Sopenharmony_ci * quad-offset-units test). 7228bf215546Sopenharmony_ci */ 7229bf215546Sopenharmony_ci wm.GlobalDepthOffsetConstant = cso->cso.offset_units * 2; 7230bf215546Sopenharmony_ci wm.GlobalDepthOffsetScale = cso->cso.offset_scale; 7231bf215546Sopenharmony_ci } 7232bf215546Sopenharmony_ci wm.SamplerStatePointer = ro_bo(batch->state.bo, 7233bf215546Sopenharmony_ci ice->state.shaders[MESA_SHADER_FRAGMENT].sampler_offset); 7234bf215546Sopenharmony_ci#endif 7235bf215546Sopenharmony_ci 7236bf215546Sopenharmony_ci wm.StatisticsEnable = (GFX_VER >= 6 || ice->state.stats_wm) ? 7237bf215546Sopenharmony_ci ice->state.statistics_counters_enabled : 0; 7238bf215546Sopenharmony_ci 7239bf215546Sopenharmony_ci#if GFX_VER >= 6 7240bf215546Sopenharmony_ci wm.LineAntialiasingRegionWidth = _10pixels; 7241bf215546Sopenharmony_ci wm.LineEndCapAntialiasingRegionWidth = _05pixels; 7242bf215546Sopenharmony_ci 7243bf215546Sopenharmony_ci wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; 7244bf215546Sopenharmony_ci wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes; 7245bf215546Sopenharmony_ci#endif 7246bf215546Sopenharmony_ci#if GFX_VER == 6 7247bf215546Sopenharmony_ci wm.DualSourceBlendEnable = wm_prog_data->dual_src_blend && 7248bf215546Sopenharmony_ci ice->state.cso_blend->dual_color_blending; 7249bf215546Sopenharmony_ci wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; 7250bf215546Sopenharmony_ci wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; 7251bf215546Sopenharmony_ci 7252bf215546Sopenharmony_ci /* From the SNB PRM, volume 2 part 1, page 281: 7253bf215546Sopenharmony_ci * "If the PS kernel does not need the Position XY Offsets 7254bf215546Sopenharmony_ci * to compute a Position XY value, then this field should be 7255bf215546Sopenharmony_ci * programmed to POSOFFSET_NONE." 7256bf215546Sopenharmony_ci * 7257bf215546Sopenharmony_ci * "SW Recommendation: If the PS kernel needs the Position Offsets 7258bf215546Sopenharmony_ci * to compute a Position XY value, this field should match Position 7259bf215546Sopenharmony_ci * ZW Interpolation Mode to ensure a consistent position.xyzw 7260bf215546Sopenharmony_ci * computation." 7261bf215546Sopenharmony_ci * We only require XY sample offsets. So, this recommendation doesn't 7262bf215546Sopenharmony_ci * look useful at the moment. We might need this in future. 7263bf215546Sopenharmony_ci */ 7264bf215546Sopenharmony_ci if (wm_prog_data->uses_pos_offset) 7265bf215546Sopenharmony_ci wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE; 7266bf215546Sopenharmony_ci else 7267bf215546Sopenharmony_ci wm.PositionXYOffsetSelect = POSOFFSET_NONE; 7268bf215546Sopenharmony_ci#endif 7269bf215546Sopenharmony_ci wm.LineStippleEnable = cso->cso.line_stipple_enable; 7270bf215546Sopenharmony_ci wm.PolygonStippleEnable = cso->cso.poly_stipple_enable; 7271bf215546Sopenharmony_ci 7272bf215546Sopenharmony_ci#if GFX_VER < 7 7273bf215546Sopenharmony_ci if (wm_prog_data->base.use_alt_mode) 7274bf215546Sopenharmony_ci wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate; 7275bf215546Sopenharmony_ci wm.BindingTableEntryCount = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bt.size_bytes / 4; 7276bf215546Sopenharmony_ci wm.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1; 7277bf215546Sopenharmony_ci#endif 7278bf215546Sopenharmony_ci 7279bf215546Sopenharmony_ci#if GFX_VER < 8 7280bf215546Sopenharmony_ci#if GFX_VER >= 6 7281bf215546Sopenharmony_ci wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; 7282bf215546Sopenharmony_ci 7283bf215546Sopenharmony_ci struct pipe_framebuffer_state *fb = &ice->state.framebuffer; 7284bf215546Sopenharmony_ci if (fb->samples > 1) { 7285bf215546Sopenharmony_ci if (cso->cso.multisample) 7286bf215546Sopenharmony_ci wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; 7287bf215546Sopenharmony_ci else 7288bf215546Sopenharmony_ci wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 7289bf215546Sopenharmony_ci 7290bf215546Sopenharmony_ci if (wm_prog_data->persample_dispatch) 7291bf215546Sopenharmony_ci wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 7292bf215546Sopenharmony_ci else 7293bf215546Sopenharmony_ci wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; 7294bf215546Sopenharmony_ci } else { 7295bf215546Sopenharmony_ci wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; 7296bf215546Sopenharmony_ci wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; 7297bf215546Sopenharmony_ci } 7298bf215546Sopenharmony_ci#endif 7299bf215546Sopenharmony_ci 7300bf215546Sopenharmony_ci wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; 7301bf215546Sopenharmony_ci 7302bf215546Sopenharmony_ci if (wm_prog_data->uses_kill || 7303bf215546Sopenharmony_ci ice->state.cso_zsa->cso.alpha_enabled || 7304bf215546Sopenharmony_ci ice->state.cso_blend->cso.alpha_to_coverage || 7305bf215546Sopenharmony_ci (GFX_VER >= 6 && wm_prog_data->uses_omask)) 7306bf215546Sopenharmony_ci wm.PixelShaderKillsPixel = true; 7307bf215546Sopenharmony_ci 7308bf215546Sopenharmony_ci if (has_writeable_rt(ice->state.cso_blend, fs_info) || 7309bf215546Sopenharmony_ci writes_depth || wm.PixelShaderKillsPixel || 7310bf215546Sopenharmony_ci (GFX_VER >= 6 && wm_prog_data->has_side_effects)) 7311bf215546Sopenharmony_ci wm.ThreadDispatchEnable = true; 7312bf215546Sopenharmony_ci 7313bf215546Sopenharmony_ci#if GFX_VER >= 7 7314bf215546Sopenharmony_ci wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; 7315bf215546Sopenharmony_ci wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; 7316bf215546Sopenharmony_ci#else 7317bf215546Sopenharmony_ci if (wm_prog_data->base.total_scratch) { 7318bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, 7319bf215546Sopenharmony_ci MESA_SHADER_FRAGMENT); 7320bf215546Sopenharmony_ci wm.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11; 7321bf215546Sopenharmony_ci wm.ScratchSpaceBasePointer = rw_bo(bo, 0); 7322bf215546Sopenharmony_ci } 7323bf215546Sopenharmony_ci 7324bf215546Sopenharmony_ci wm.PixelShaderComputedDepth = writes_depth; 7325bf215546Sopenharmony_ci 7326bf215546Sopenharmony_ci#endif 7327bf215546Sopenharmony_ci /* The "UAV access enable" bits are unnecessary on HSW because they only 7328bf215546Sopenharmony_ci * seem to have an effect on the HW-assisted coherency mechanism which we 7329bf215546Sopenharmony_ci * don't need, and the rasterization-related UAV_ONLY flag and the 7330bf215546Sopenharmony_ci * DISPATCH_ENABLE bit can be set independently from it. 7331bf215546Sopenharmony_ci * C.f. gen8_upload_ps_extra(). 7332bf215546Sopenharmony_ci * 7333bf215546Sopenharmony_ci * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | 7334bf215546Sopenharmony_ci * _NEW_COLOR 7335bf215546Sopenharmony_ci */ 7336bf215546Sopenharmony_ci#if GFX_VERx10 == 75 7337bf215546Sopenharmony_ci if (!(has_writeable_rt(ice->state.cso_blend, fs_info) || writes_depth) && 7338bf215546Sopenharmony_ci wm_prog_data->has_side_effects) 7339bf215546Sopenharmony_ci wm.PSUAVonly = ON; 7340bf215546Sopenharmony_ci#endif 7341bf215546Sopenharmony_ci#endif 7342bf215546Sopenharmony_ci#if GFX_VER >= 7 7343bf215546Sopenharmony_ci /* BRW_NEW_FS_PROG_DATA */ 7344bf215546Sopenharmony_ci if (wm_prog_data->early_fragment_tests) 7345bf215546Sopenharmony_ci wm.EarlyDepthStencilControl = EDSC_PREPS; 7346bf215546Sopenharmony_ci else if (wm_prog_data->has_side_effects) 7347bf215546Sopenharmony_ci wm.EarlyDepthStencilControl = EDSC_PSEXEC; 7348bf215546Sopenharmony_ci#endif 7349bf215546Sopenharmony_ci#if GFX_VER == 8 7350bf215546Sopenharmony_ci /* We could skip this bit if color writes are enabled. */ 7351bf215546Sopenharmony_ci if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill) 7352bf215546Sopenharmony_ci wm.ForceThreadDispatchEnable = ForceON; 7353bf215546Sopenharmony_ci#endif 7354bf215546Sopenharmony_ci }; 7355bf215546Sopenharmony_ci 7356bf215546Sopenharmony_ci#if GFX_VER <= 5 7357bf215546Sopenharmony_ci if (ice->state.global_depth_offset_clamp != cso->cso.offset_clamp) { 7358bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) { 7359bf215546Sopenharmony_ci clamp.GlobalDepthOffsetClamp = cso->cso.offset_clamp; 7360bf215546Sopenharmony_ci } 7361bf215546Sopenharmony_ci ice->state.global_depth_offset_clamp = cso->cso.offset_clamp; 7362bf215546Sopenharmony_ci } 7363bf215546Sopenharmony_ci#endif 7364bf215546Sopenharmony_ci } 7365bf215546Sopenharmony_ci 7366bf215546Sopenharmony_ci#if GFX_VER >= 7 7367bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN7_SBE) { 7368bf215546Sopenharmony_ci crocus_emit_sbe(batch, ice); 7369bf215546Sopenharmony_ci } 7370bf215546Sopenharmony_ci#endif 7371bf215546Sopenharmony_ci 7372bf215546Sopenharmony_ci#if GFX_VER >= 8 7373bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN8_PS_BLEND) { 7374bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT]; 7375bf215546Sopenharmony_ci struct crocus_blend_state *cso_blend = ice->state.cso_blend; 7376bf215546Sopenharmony_ci struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; 7377bf215546Sopenharmony_ci struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; 7378bf215546Sopenharmony_ci const struct shader_info *fs_info = 7379bf215546Sopenharmony_ci crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT); 7380bf215546Sopenharmony_ci uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)]; 7381bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) { 7382bf215546Sopenharmony_ci pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info); 7383bf215546Sopenharmony_ci pb.AlphaTestEnable = cso_zsa->cso.alpha_enabled; 7384bf215546Sopenharmony_ci pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) && 7385bf215546Sopenharmony_ci (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend); 7386bf215546Sopenharmony_ci } 7387bf215546Sopenharmony_ci crocus_emit_merge(batch, cso_blend->ps_blend, dynamic_pb, 7388bf215546Sopenharmony_ci ARRAY_SIZE(cso_blend->ps_blend)); 7389bf215546Sopenharmony_ci } 7390bf215546Sopenharmony_ci#endif 7391bf215546Sopenharmony_ci 7392bf215546Sopenharmony_ci#if GFX_VER >= 6 7393bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL) { 7394bf215546Sopenharmony_ci 7395bf215546Sopenharmony_ci#if GFX_VER >= 8 7396bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) { 7397bf215546Sopenharmony_ci set_depth_stencil_bits(ice, &wmds); 7398bf215546Sopenharmony_ci } 7399bf215546Sopenharmony_ci#else 7400bf215546Sopenharmony_ci uint32_t ds_offset; 7401bf215546Sopenharmony_ci void *ds_map = stream_state(batch, 7402bf215546Sopenharmony_ci sizeof(uint32_t) * GENX(DEPTH_STENCIL_STATE_length), 7403bf215546Sopenharmony_ci 64, &ds_offset); 7404bf215546Sopenharmony_ci _crocus_pack_state(batch, GENX(DEPTH_STENCIL_STATE), ds_map, ds) { 7405bf215546Sopenharmony_ci set_depth_stencil_bits(ice, &ds); 7406bf215546Sopenharmony_ci } 7407bf215546Sopenharmony_ci 7408bf215546Sopenharmony_ci#if GFX_VER == 6 7409bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 7410bf215546Sopenharmony_ci ptr.PointertoDEPTH_STENCIL_STATE = ds_offset; 7411bf215546Sopenharmony_ci ptr.DEPTH_STENCIL_STATEChange = true; 7412bf215546Sopenharmony_ci } 7413bf215546Sopenharmony_ci#else 7414bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) { 7415bf215546Sopenharmony_ci ptr.PointertoDEPTH_STENCIL_STATE = ds_offset; 7416bf215546Sopenharmony_ci } 7417bf215546Sopenharmony_ci#endif 7418bf215546Sopenharmony_ci#endif 7419bf215546Sopenharmony_ci } 7420bf215546Sopenharmony_ci 7421bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN6_SCISSOR_RECT) { 7422bf215546Sopenharmony_ci /* Align to 64-byte boundary as per anv. */ 7423bf215546Sopenharmony_ci uint32_t scissor_offset; 7424bf215546Sopenharmony_ci struct pipe_scissor_state *scissor_map = (void *) 7425bf215546Sopenharmony_ci stream_state(batch, sizeof(struct pipe_scissor_state) * ice->state.num_viewports, 7426bf215546Sopenharmony_ci 64, &scissor_offset); 7427bf215546Sopenharmony_ci for (int i = 0; i < ice->state.num_viewports; i++) { 7428bf215546Sopenharmony_ci struct pipe_scissor_state scissor; 7429bf215546Sopenharmony_ci crocus_fill_scissor_rect(ice, i, &scissor); 7430bf215546Sopenharmony_ci scissor_map[i] = scissor; 7431bf215546Sopenharmony_ci } 7432bf215546Sopenharmony_ci 7433bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { 7434bf215546Sopenharmony_ci ptr.ScissorRectPointer = scissor_offset; 7435bf215546Sopenharmony_ci } 7436bf215546Sopenharmony_ci } 7437bf215546Sopenharmony_ci#endif 7438bf215546Sopenharmony_ci 7439bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_DEPTH_BUFFER) { 7440bf215546Sopenharmony_ci struct isl_device *isl_dev = &batch->screen->isl_dev; 7441bf215546Sopenharmony_ci#if GFX_VER >= 6 7442bf215546Sopenharmony_ci crocus_emit_depth_stall_flushes(batch); 7443bf215546Sopenharmony_ci#endif 7444bf215546Sopenharmony_ci void *batch_ptr; 7445bf215546Sopenharmony_ci struct crocus_resource *zres, *sres; 7446bf215546Sopenharmony_ci struct pipe_framebuffer_state *cso = &ice->state.framebuffer; 7447bf215546Sopenharmony_ci batch_ptr = crocus_get_command_space(batch, isl_dev->ds.size); 7448bf215546Sopenharmony_ci 7449bf215546Sopenharmony_ci struct isl_view view = { 7450bf215546Sopenharmony_ci .base_level = 0, 7451bf215546Sopenharmony_ci .levels = 1, 7452bf215546Sopenharmony_ci .base_array_layer = 0, 7453bf215546Sopenharmony_ci .array_len = 1, 7454bf215546Sopenharmony_ci .swizzle = ISL_SWIZZLE_IDENTITY, 7455bf215546Sopenharmony_ci }; 7456bf215546Sopenharmony_ci struct isl_depth_stencil_hiz_emit_info info = { 7457bf215546Sopenharmony_ci .view = &view, 7458bf215546Sopenharmony_ci .mocs = crocus_mocs(NULL, isl_dev), 7459bf215546Sopenharmony_ci }; 7460bf215546Sopenharmony_ci 7461bf215546Sopenharmony_ci if (cso->zsbuf) { 7462bf215546Sopenharmony_ci crocus_get_depth_stencil_resources(&batch->screen->devinfo, cso->zsbuf->texture, &zres, &sres); 7463bf215546Sopenharmony_ci struct crocus_surface *zsbuf = (struct crocus_surface *)cso->zsbuf; 7464bf215546Sopenharmony_ci if (zsbuf->align_res) { 7465bf215546Sopenharmony_ci zres = (struct crocus_resource *)zsbuf->align_res; 7466bf215546Sopenharmony_ci } 7467bf215546Sopenharmony_ci view.base_level = cso->zsbuf->u.tex.level; 7468bf215546Sopenharmony_ci view.base_array_layer = cso->zsbuf->u.tex.first_layer; 7469bf215546Sopenharmony_ci view.array_len = cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1; 7470bf215546Sopenharmony_ci 7471bf215546Sopenharmony_ci if (zres) { 7472bf215546Sopenharmony_ci view.usage |= ISL_SURF_USAGE_DEPTH_BIT; 7473bf215546Sopenharmony_ci 7474bf215546Sopenharmony_ci info.depth_surf = &zres->surf; 7475bf215546Sopenharmony_ci info.depth_address = crocus_command_reloc(batch, 7476bf215546Sopenharmony_ci (batch_ptr - batch->command.map) + isl_dev->ds.depth_offset, 7477bf215546Sopenharmony_ci zres->bo, 0, RELOC_32BIT); 7478bf215546Sopenharmony_ci 7479bf215546Sopenharmony_ci info.mocs = crocus_mocs(zres->bo, isl_dev); 7480bf215546Sopenharmony_ci view.format = zres->surf.format; 7481bf215546Sopenharmony_ci 7482bf215546Sopenharmony_ci if (crocus_resource_level_has_hiz(zres, view.base_level)) { 7483bf215546Sopenharmony_ci info.hiz_usage = zres->aux.usage; 7484bf215546Sopenharmony_ci info.hiz_surf = &zres->aux.surf; 7485bf215546Sopenharmony_ci uint64_t hiz_offset = 0; 7486bf215546Sopenharmony_ci 7487bf215546Sopenharmony_ci#if GFX_VER == 6 7488bf215546Sopenharmony_ci /* HiZ surfaces on Sandy Bridge technically don't support 7489bf215546Sopenharmony_ci * mip-mapping. However, we can fake it by offsetting to the 7490bf215546Sopenharmony_ci * first slice of LOD0 in the HiZ surface. 7491bf215546Sopenharmony_ci */ 7492bf215546Sopenharmony_ci isl_surf_get_image_offset_B_tile_sa(&zres->aux.surf, 7493bf215546Sopenharmony_ci view.base_level, 0, 0, 7494bf215546Sopenharmony_ci &hiz_offset, NULL, NULL); 7495bf215546Sopenharmony_ci#endif 7496bf215546Sopenharmony_ci info.hiz_address = crocus_command_reloc(batch, 7497bf215546Sopenharmony_ci (batch_ptr - batch->command.map) + isl_dev->ds.hiz_offset, 7498bf215546Sopenharmony_ci zres->aux.bo, zres->aux.offset + hiz_offset, 7499bf215546Sopenharmony_ci RELOC_32BIT); 7500bf215546Sopenharmony_ci info.depth_clear_value = crocus_resource_get_clear_color(zres).f32[0]; 7501bf215546Sopenharmony_ci } 7502bf215546Sopenharmony_ci } 7503bf215546Sopenharmony_ci 7504bf215546Sopenharmony_ci#if GFX_VER >= 6 7505bf215546Sopenharmony_ci if (sres) { 7506bf215546Sopenharmony_ci view.usage |= ISL_SURF_USAGE_STENCIL_BIT; 7507bf215546Sopenharmony_ci info.stencil_aux_usage = sres->aux.usage; 7508bf215546Sopenharmony_ci info.stencil_surf = &sres->surf; 7509bf215546Sopenharmony_ci 7510bf215546Sopenharmony_ci uint64_t stencil_offset = 0; 7511bf215546Sopenharmony_ci#if GFX_VER == 6 7512bf215546Sopenharmony_ci /* Stencil surfaces on Sandy Bridge technically don't support 7513bf215546Sopenharmony_ci * mip-mapping. However, we can fake it by offsetting to the 7514bf215546Sopenharmony_ci * first slice of LOD0 in the stencil surface. 7515bf215546Sopenharmony_ci */ 7516bf215546Sopenharmony_ci isl_surf_get_image_offset_B_tile_sa(&sres->surf, 7517bf215546Sopenharmony_ci view.base_level, 0, 0, 7518bf215546Sopenharmony_ci &stencil_offset, NULL, NULL); 7519bf215546Sopenharmony_ci#endif 7520bf215546Sopenharmony_ci 7521bf215546Sopenharmony_ci info.stencil_address = crocus_command_reloc(batch, 7522bf215546Sopenharmony_ci (batch_ptr - batch->command.map) + isl_dev->ds.stencil_offset, 7523bf215546Sopenharmony_ci sres->bo, stencil_offset, RELOC_32BIT); 7524bf215546Sopenharmony_ci if (!zres) { 7525bf215546Sopenharmony_ci view.format = sres->surf.format; 7526bf215546Sopenharmony_ci info.mocs = crocus_mocs(sres->bo, isl_dev); 7527bf215546Sopenharmony_ci } 7528bf215546Sopenharmony_ci } 7529bf215546Sopenharmony_ci#endif 7530bf215546Sopenharmony_ci } 7531bf215546Sopenharmony_ci isl_emit_depth_stencil_hiz_s(isl_dev, batch_ptr, &info); 7532bf215546Sopenharmony_ci } 7533bf215546Sopenharmony_ci 7534bf215546Sopenharmony_ci /* TODO: Disable emitting this until something uses a stipple. */ 7535bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_POLYGON_STIPPLE) { 7536bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { 7537bf215546Sopenharmony_ci for (int i = 0; i < 32; i++) { 7538bf215546Sopenharmony_ci poly.PatternRow[i] = ice->state.poly_stipple.stipple[i]; 7539bf215546Sopenharmony_ci } 7540bf215546Sopenharmony_ci } 7541bf215546Sopenharmony_ci } 7542bf215546Sopenharmony_ci 7543bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_LINE_STIPPLE) { 7544bf215546Sopenharmony_ci struct crocus_rasterizer_state *cso = ice->state.cso_rast; 7545bf215546Sopenharmony_ci crocus_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple)); 7546bf215546Sopenharmony_ci } 7547bf215546Sopenharmony_ci 7548bf215546Sopenharmony_ci#if GFX_VER >= 8 7549bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN8_VF_TOPOLOGY) { 7550bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { 7551bf215546Sopenharmony_ci topo.PrimitiveTopologyType = 7552bf215546Sopenharmony_ci translate_prim_type(draw->mode, ice->state.patch_vertices); 7553bf215546Sopenharmony_ci } 7554bf215546Sopenharmony_ci } 7555bf215546Sopenharmony_ci#endif 7556bf215546Sopenharmony_ci 7557bf215546Sopenharmony_ci#if GFX_VER <= 5 7558bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN5_PIPELINED_POINTERS) { 7559bf215546Sopenharmony_ci upload_pipelined_state_pointers(batch, ice->shaders.ff_gs_prog ? true : false, ice->shaders.gs_offset, 7560bf215546Sopenharmony_ci ice->shaders.vs_offset, ice->shaders.sf_offset, 7561bf215546Sopenharmony_ci ice->shaders.clip_offset, ice->shaders.wm_offset, ice->shaders.cc_offset); 7562bf215546Sopenharmony_ci crocus_upload_urb_fence(batch); 7563bf215546Sopenharmony_ci 7564bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(CS_URB_STATE), cs) { 7565bf215546Sopenharmony_ci cs.NumberofURBEntries = ice->urb.nr_cs_entries; 7566bf215546Sopenharmony_ci cs.URBEntryAllocationSize = ice->urb.csize - 1; 7567bf215546Sopenharmony_ci } 7568bf215546Sopenharmony_ci dirty |= CROCUS_DIRTY_GEN4_CURBE; 7569bf215546Sopenharmony_ci } 7570bf215546Sopenharmony_ci#endif 7571bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_DRAWING_RECTANGLE) { 7572bf215546Sopenharmony_ci struct pipe_framebuffer_state *fb = &ice->state.framebuffer; 7573bf215546Sopenharmony_ci if (fb->width && fb->height) { 7574bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 7575bf215546Sopenharmony_ci rect.ClippedDrawingRectangleXMax = fb->width - 1; 7576bf215546Sopenharmony_ci rect.ClippedDrawingRectangleYMax = fb->height - 1; 7577bf215546Sopenharmony_ci } 7578bf215546Sopenharmony_ci } 7579bf215546Sopenharmony_ci } 7580bf215546Sopenharmony_ci 7581bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_VERTEX_BUFFERS) { 7582bf215546Sopenharmony_ci const uint32_t user_count = util_bitcount(ice->state.bound_vertex_buffers); 7583bf215546Sopenharmony_ci const uint32_t count = user_count + 7584bf215546Sopenharmony_ci ice->state.vs_uses_draw_params + ice->state.vs_uses_derived_draw_params; 7585bf215546Sopenharmony_ci uint32_t dynamic_bound = ice->state.bound_vertex_buffers; 7586bf215546Sopenharmony_ci 7587bf215546Sopenharmony_ci if (count) { 7588bf215546Sopenharmony_ci const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); 7589bf215546Sopenharmony_ci 7590bf215546Sopenharmony_ci uint32_t *map = 7591bf215546Sopenharmony_ci crocus_get_command_space(batch, 4 * (1 + vb_dwords * count)); 7592bf215546Sopenharmony_ci _crocus_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) { 7593bf215546Sopenharmony_ci vb.DWordLength = (vb_dwords * count + 1) - 2; 7594bf215546Sopenharmony_ci } 7595bf215546Sopenharmony_ci map += 1; 7596bf215546Sopenharmony_ci 7597bf215546Sopenharmony_ci uint32_t bound = dynamic_bound; 7598bf215546Sopenharmony_ci int i; 7599bf215546Sopenharmony_ci while (bound) { 7600bf215546Sopenharmony_ci i = u_bit_scan(&bound); 7601bf215546Sopenharmony_ci struct pipe_vertex_buffer *buf = &ice->state.vertex_buffers[i]; 7602bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_resource_bo(buf->buffer.resource); 7603bf215546Sopenharmony_ci uint32_t step_rate = ice->state.cso_vertex_elements->step_rate[i]; 7604bf215546Sopenharmony_ci 7605bf215546Sopenharmony_ci emit_vertex_buffer_state(batch, i, bo, 7606bf215546Sopenharmony_ci buf->buffer_offset, 7607bf215546Sopenharmony_ci ice->state.vb_end[i], 7608bf215546Sopenharmony_ci buf->stride, 7609bf215546Sopenharmony_ci step_rate, 7610bf215546Sopenharmony_ci &map); 7611bf215546Sopenharmony_ci } 7612bf215546Sopenharmony_ci i = user_count; 7613bf215546Sopenharmony_ci if (ice->state.vs_uses_draw_params) { 7614bf215546Sopenharmony_ci struct crocus_resource *res = (struct crocus_resource *)ice->draw.draw_params.res; 7615bf215546Sopenharmony_ci emit_vertex_buffer_state(batch, i++, 7616bf215546Sopenharmony_ci res->bo, 7617bf215546Sopenharmony_ci ice->draw.draw_params.offset, 7618bf215546Sopenharmony_ci ice->draw.draw_params.res->width0, 7619bf215546Sopenharmony_ci 0, 0, &map); 7620bf215546Sopenharmony_ci } 7621bf215546Sopenharmony_ci if (ice->state.vs_uses_derived_draw_params) { 7622bf215546Sopenharmony_ci struct crocus_resource *res = (struct crocus_resource *)ice->draw.derived_draw_params.res; 7623bf215546Sopenharmony_ci emit_vertex_buffer_state(batch, i++, 7624bf215546Sopenharmony_ci res->bo, 7625bf215546Sopenharmony_ci ice->draw.derived_draw_params.offset, 7626bf215546Sopenharmony_ci ice->draw.derived_draw_params.res->width0, 7627bf215546Sopenharmony_ci 0, 0, &map); 7628bf215546Sopenharmony_ci } 7629bf215546Sopenharmony_ci } 7630bf215546Sopenharmony_ci } 7631bf215546Sopenharmony_ci 7632bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_VERTEX_ELEMENTS) { 7633bf215546Sopenharmony_ci struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements; 7634bf215546Sopenharmony_ci const unsigned entries = MAX2(cso->count, 1); 7635bf215546Sopenharmony_ci if (!(ice->state.vs_needs_sgvs_element || 7636bf215546Sopenharmony_ci ice->state.vs_uses_derived_draw_params || 7637bf215546Sopenharmony_ci ice->state.vs_needs_edge_flag)) { 7638bf215546Sopenharmony_ci crocus_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * 7639bf215546Sopenharmony_ci (1 + entries * GENX(VERTEX_ELEMENT_STATE_length))); 7640bf215546Sopenharmony_ci } else { 7641bf215546Sopenharmony_ci uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; 7642bf215546Sopenharmony_ci const unsigned dyn_count = cso->count + 7643bf215546Sopenharmony_ci ice->state.vs_needs_sgvs_element + 7644bf215546Sopenharmony_ci ice->state.vs_uses_derived_draw_params; 7645bf215546Sopenharmony_ci 7646bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), 7647bf215546Sopenharmony_ci &dynamic_ves, ve) { 7648bf215546Sopenharmony_ci ve.DWordLength = 7649bf215546Sopenharmony_ci 1 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2; 7650bf215546Sopenharmony_ci } 7651bf215546Sopenharmony_ci memcpy(&dynamic_ves[1], &cso->vertex_elements[1], 7652bf215546Sopenharmony_ci (cso->count - ice->state.vs_needs_edge_flag) * 7653bf215546Sopenharmony_ci GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t)); 7654bf215546Sopenharmony_ci uint32_t *ve_pack_dest = 7655bf215546Sopenharmony_ci &dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) * 7656bf215546Sopenharmony_ci GENX(VERTEX_ELEMENT_STATE_length)]; 7657bf215546Sopenharmony_ci 7658bf215546Sopenharmony_ci if (ice->state.vs_needs_sgvs_element) { 7659bf215546Sopenharmony_ci uint32_t base_ctrl = ice->state.vs_uses_draw_params ? 7660bf215546Sopenharmony_ci VFCOMP_STORE_SRC : VFCOMP_STORE_0; 7661bf215546Sopenharmony_ci crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 7662bf215546Sopenharmony_ci ve.Valid = true; 7663bf215546Sopenharmony_ci ve.VertexBufferIndex = 7664bf215546Sopenharmony_ci util_bitcount64(ice->state.bound_vertex_buffers); 7665bf215546Sopenharmony_ci ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT; 7666bf215546Sopenharmony_ci ve.Component0Control = base_ctrl; 7667bf215546Sopenharmony_ci ve.Component1Control = base_ctrl; 7668bf215546Sopenharmony_ci#if GFX_VER < 8 7669bf215546Sopenharmony_ci ve.Component2Control = ice->state.vs_uses_vertexid ? VFCOMP_STORE_VID : VFCOMP_STORE_0; 7670bf215546Sopenharmony_ci ve.Component3Control = ice->state.vs_uses_instanceid ? VFCOMP_STORE_IID : VFCOMP_STORE_0; 7671bf215546Sopenharmony_ci#else 7672bf215546Sopenharmony_ci ve.Component2Control = VFCOMP_STORE_0; 7673bf215546Sopenharmony_ci ve.Component3Control = VFCOMP_STORE_0; 7674bf215546Sopenharmony_ci#endif 7675bf215546Sopenharmony_ci#if GFX_VER < 5 7676bf215546Sopenharmony_ci ve.DestinationElementOffset = cso->count * 4; 7677bf215546Sopenharmony_ci#endif 7678bf215546Sopenharmony_ci } 7679bf215546Sopenharmony_ci ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 7680bf215546Sopenharmony_ci } 7681bf215546Sopenharmony_ci if (ice->state.vs_uses_derived_draw_params) { 7682bf215546Sopenharmony_ci crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { 7683bf215546Sopenharmony_ci ve.Valid = true; 7684bf215546Sopenharmony_ci ve.VertexBufferIndex = 7685bf215546Sopenharmony_ci util_bitcount64(ice->state.bound_vertex_buffers) + 7686bf215546Sopenharmony_ci ice->state.vs_uses_draw_params; 7687bf215546Sopenharmony_ci ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT; 7688bf215546Sopenharmony_ci ve.Component0Control = VFCOMP_STORE_SRC; 7689bf215546Sopenharmony_ci ve.Component1Control = VFCOMP_STORE_SRC; 7690bf215546Sopenharmony_ci ve.Component2Control = VFCOMP_STORE_0; 7691bf215546Sopenharmony_ci ve.Component3Control = VFCOMP_STORE_0; 7692bf215546Sopenharmony_ci#if GFX_VER < 5 7693bf215546Sopenharmony_ci ve.DestinationElementOffset = (cso->count + ice->state.vs_needs_sgvs_element) * 4; 7694bf215546Sopenharmony_ci#endif 7695bf215546Sopenharmony_ci } 7696bf215546Sopenharmony_ci ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); 7697bf215546Sopenharmony_ci } 7698bf215546Sopenharmony_ci if (ice->state.vs_needs_edge_flag) { 7699bf215546Sopenharmony_ci for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length); i++) 7700bf215546Sopenharmony_ci ve_pack_dest[i] = cso->edgeflag_ve[i]; 7701bf215546Sopenharmony_ci } 7702bf215546Sopenharmony_ci 7703bf215546Sopenharmony_ci crocus_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) * 7704bf215546Sopenharmony_ci (1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length))); 7705bf215546Sopenharmony_ci } 7706bf215546Sopenharmony_ci 7707bf215546Sopenharmony_ci#if GFX_VER == 8 7708bf215546Sopenharmony_ci if (!ice->state.vs_needs_edge_flag) { 7709bf215546Sopenharmony_ci crocus_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) * 7710bf215546Sopenharmony_ci entries * GENX(3DSTATE_VF_INSTANCING_length)); 7711bf215546Sopenharmony_ci } else { 7712bf215546Sopenharmony_ci assert(cso->count > 0); 7713bf215546Sopenharmony_ci const unsigned edgeflag_index = cso->count - 1; 7714bf215546Sopenharmony_ci uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)]; 7715bf215546Sopenharmony_ci memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index * 7716bf215546Sopenharmony_ci GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t)); 7717bf215546Sopenharmony_ci 7718bf215546Sopenharmony_ci uint32_t *vfi_pack_dest = &dynamic_vfi[0] + 7719bf215546Sopenharmony_ci edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length); 7720bf215546Sopenharmony_ci crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { 7721bf215546Sopenharmony_ci vi.VertexElementIndex = edgeflag_index + 7722bf215546Sopenharmony_ci ice->state.vs_needs_sgvs_element + 7723bf215546Sopenharmony_ci ice->state.vs_uses_derived_draw_params; 7724bf215546Sopenharmony_ci } 7725bf215546Sopenharmony_ci for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length); i++) 7726bf215546Sopenharmony_ci vfi_pack_dest[i] |= cso->edgeflag_vfi[i]; 7727bf215546Sopenharmony_ci 7728bf215546Sopenharmony_ci crocus_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) * 7729bf215546Sopenharmony_ci entries * GENX(3DSTATE_VF_INSTANCING_length)); 7730bf215546Sopenharmony_ci } 7731bf215546Sopenharmony_ci#endif 7732bf215546Sopenharmony_ci } 7733bf215546Sopenharmony_ci 7734bf215546Sopenharmony_ci#if GFX_VER == 8 7735bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN8_VF_SGVS) { 7736bf215546Sopenharmony_ci const struct brw_vs_prog_data *vs_prog_data = (void *) 7737bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data; 7738bf215546Sopenharmony_ci struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements; 7739bf215546Sopenharmony_ci 7740bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) { 7741bf215546Sopenharmony_ci if (vs_prog_data->uses_vertexid) { 7742bf215546Sopenharmony_ci sgv.VertexIDEnable = true; 7743bf215546Sopenharmony_ci sgv.VertexIDComponentNumber = 2; 7744bf215546Sopenharmony_ci sgv.VertexIDElementOffset = 7745bf215546Sopenharmony_ci cso->count - ice->state.vs_needs_edge_flag; 7746bf215546Sopenharmony_ci } 7747bf215546Sopenharmony_ci 7748bf215546Sopenharmony_ci if (vs_prog_data->uses_instanceid) { 7749bf215546Sopenharmony_ci sgv.InstanceIDEnable = true; 7750bf215546Sopenharmony_ci sgv.InstanceIDComponentNumber = 3; 7751bf215546Sopenharmony_ci sgv.InstanceIDElementOffset = 7752bf215546Sopenharmony_ci cso->count - ice->state.vs_needs_edge_flag; 7753bf215546Sopenharmony_ci } 7754bf215546Sopenharmony_ci } 7755bf215546Sopenharmony_ci } 7756bf215546Sopenharmony_ci#endif 7757bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 7758bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN75_VF) { 7759bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_VF), vf) { 7760bf215546Sopenharmony_ci if (draw->primitive_restart) { 7761bf215546Sopenharmony_ci vf.IndexedDrawCutIndexEnable = true; 7762bf215546Sopenharmony_ci vf.CutIndex = draw->restart_index; 7763bf215546Sopenharmony_ci } 7764bf215546Sopenharmony_ci } 7765bf215546Sopenharmony_ci } 7766bf215546Sopenharmony_ci#endif 7767bf215546Sopenharmony_ci 7768bf215546Sopenharmony_ci#if GFX_VER == 8 7769bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN8_PMA_FIX) { 7770bf215546Sopenharmony_ci bool enable = want_pma_fix(ice); 7771bf215546Sopenharmony_ci genX(crocus_update_pma_fix)(ice, batch, enable); 7772bf215546Sopenharmony_ci } 7773bf215546Sopenharmony_ci#endif 7774bf215546Sopenharmony_ci 7775bf215546Sopenharmony_ci#if GFX_VER <= 5 7776bf215546Sopenharmony_ci if (dirty & CROCUS_DIRTY_GEN4_CURBE) { 7777bf215546Sopenharmony_ci gen4_upload_curbe(batch); 7778bf215546Sopenharmony_ci } 7779bf215546Sopenharmony_ci#endif 7780bf215546Sopenharmony_ci} 7781bf215546Sopenharmony_ci 7782bf215546Sopenharmony_cistatic void 7783bf215546Sopenharmony_cicrocus_upload_render_state(struct crocus_context *ice, 7784bf215546Sopenharmony_ci struct crocus_batch *batch, 7785bf215546Sopenharmony_ci const struct pipe_draw_info *draw, 7786bf215546Sopenharmony_ci unsigned drawid_offset, 7787bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 7788bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *sc) 7789bf215546Sopenharmony_ci{ 7790bf215546Sopenharmony_ci#if GFX_VER >= 7 7791bf215546Sopenharmony_ci bool use_predicate = ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT; 7792bf215546Sopenharmony_ci#endif 7793bf215546Sopenharmony_ci 7794bf215546Sopenharmony_ci batch->no_wrap = true; 7795bf215546Sopenharmony_ci batch->contains_draw = true; 7796bf215546Sopenharmony_ci 7797bf215546Sopenharmony_ci crocus_update_surface_base_address(batch); 7798bf215546Sopenharmony_ci 7799bf215546Sopenharmony_ci crocus_upload_dirty_render_state(ice, batch, draw); 7800bf215546Sopenharmony_ci 7801bf215546Sopenharmony_ci batch->no_wrap = false; 7802bf215546Sopenharmony_ci if (draw->index_size > 0) { 7803bf215546Sopenharmony_ci unsigned offset; 7804bf215546Sopenharmony_ci unsigned size; 7805bf215546Sopenharmony_ci bool emit_index = false; 7806bf215546Sopenharmony_ci 7807bf215546Sopenharmony_ci if (draw->has_user_indices) { 7808bf215546Sopenharmony_ci unsigned start_offset = draw->index_size * sc->start; 7809bf215546Sopenharmony_ci u_upload_data(ice->ctx.stream_uploader, 0, 7810bf215546Sopenharmony_ci sc->count * draw->index_size, 4, 7811bf215546Sopenharmony_ci (char *)draw->index.user + start_offset, 7812bf215546Sopenharmony_ci &offset, &ice->state.index_buffer.res); 7813bf215546Sopenharmony_ci offset -= start_offset; 7814bf215546Sopenharmony_ci size = start_offset + sc->count * draw->index_size; 7815bf215546Sopenharmony_ci emit_index = true; 7816bf215546Sopenharmony_ci } else { 7817bf215546Sopenharmony_ci struct crocus_resource *res = (void *) draw->index.resource; 7818bf215546Sopenharmony_ci 7819bf215546Sopenharmony_ci if (ice->state.index_buffer.res != draw->index.resource) { 7820bf215546Sopenharmony_ci res->bind_history |= PIPE_BIND_INDEX_BUFFER; 7821bf215546Sopenharmony_ci pipe_resource_reference(&ice->state.index_buffer.res, 7822bf215546Sopenharmony_ci draw->index.resource); 7823bf215546Sopenharmony_ci emit_index = true; 7824bf215546Sopenharmony_ci } 7825bf215546Sopenharmony_ci offset = 0; 7826bf215546Sopenharmony_ci size = draw->index.resource->width0; 7827bf215546Sopenharmony_ci } 7828bf215546Sopenharmony_ci 7829bf215546Sopenharmony_ci if (!emit_index && 7830bf215546Sopenharmony_ci (ice->state.index_buffer.size != size || 7831bf215546Sopenharmony_ci ice->state.index_buffer.index_size != draw->index_size 7832bf215546Sopenharmony_ci#if GFX_VERx10 < 75 7833bf215546Sopenharmony_ci || ice->state.index_buffer.prim_restart != draw->primitive_restart 7834bf215546Sopenharmony_ci#endif 7835bf215546Sopenharmony_ci ) 7836bf215546Sopenharmony_ci ) 7837bf215546Sopenharmony_ci emit_index = true; 7838bf215546Sopenharmony_ci 7839bf215546Sopenharmony_ci if (emit_index) { 7840bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_resource_bo(ice->state.index_buffer.res); 7841bf215546Sopenharmony_ci 7842bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { 7843bf215546Sopenharmony_ci#if GFX_VERx10 < 75 7844bf215546Sopenharmony_ci ib.CutIndexEnable = draw->primitive_restart; 7845bf215546Sopenharmony_ci#endif 7846bf215546Sopenharmony_ci ib.IndexFormat = draw->index_size >> 1; 7847bf215546Sopenharmony_ci ib.BufferStartingAddress = ro_bo(bo, offset); 7848bf215546Sopenharmony_ci#if GFX_VER >= 8 7849bf215546Sopenharmony_ci ib.BufferSize = bo->size - offset; 7850bf215546Sopenharmony_ci#else 7851bf215546Sopenharmony_ci ib.BufferEndingAddress = ro_bo(bo, offset + size - 1); 7852bf215546Sopenharmony_ci#endif 7853bf215546Sopenharmony_ci#if GFX_VER >= 6 7854bf215546Sopenharmony_ci ib.MOCS = crocus_mocs(bo, &batch->screen->isl_dev); 7855bf215546Sopenharmony_ci#endif 7856bf215546Sopenharmony_ci } 7857bf215546Sopenharmony_ci ice->state.index_buffer.size = size; 7858bf215546Sopenharmony_ci ice->state.index_buffer.offset = offset; 7859bf215546Sopenharmony_ci ice->state.index_buffer.index_size = draw->index_size; 7860bf215546Sopenharmony_ci#if GFX_VERx10 < 75 7861bf215546Sopenharmony_ci ice->state.index_buffer.prim_restart = draw->primitive_restart; 7862bf215546Sopenharmony_ci#endif 7863bf215546Sopenharmony_ci } 7864bf215546Sopenharmony_ci } 7865bf215546Sopenharmony_ci 7866bf215546Sopenharmony_ci#define _3DPRIM_END_OFFSET 0x2420 7867bf215546Sopenharmony_ci#define _3DPRIM_START_VERTEX 0x2430 7868bf215546Sopenharmony_ci#define _3DPRIM_VERTEX_COUNT 0x2434 7869bf215546Sopenharmony_ci#define _3DPRIM_INSTANCE_COUNT 0x2438 7870bf215546Sopenharmony_ci#define _3DPRIM_START_INSTANCE 0x243C 7871bf215546Sopenharmony_ci#define _3DPRIM_BASE_VERTEX 0x2440 7872bf215546Sopenharmony_ci 7873bf215546Sopenharmony_ci#if GFX_VER >= 7 7874bf215546Sopenharmony_ci if (indirect && !indirect->count_from_stream_output) { 7875bf215546Sopenharmony_ci if (indirect->indirect_draw_count) { 7876bf215546Sopenharmony_ci use_predicate = true; 7877bf215546Sopenharmony_ci 7878bf215546Sopenharmony_ci struct crocus_bo *draw_count_bo = 7879bf215546Sopenharmony_ci crocus_resource_bo(indirect->indirect_draw_count); 7880bf215546Sopenharmony_ci unsigned draw_count_offset = 7881bf215546Sopenharmony_ci indirect->indirect_draw_count_offset; 7882bf215546Sopenharmony_ci 7883bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, 7884bf215546Sopenharmony_ci "ensure indirect draw buffer is flushed", 7885bf215546Sopenharmony_ci PIPE_CONTROL_FLUSH_ENABLE); 7886bf215546Sopenharmony_ci if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) { 7887bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 7888bf215546Sopenharmony_ci struct mi_builder b; 7889bf215546Sopenharmony_ci mi_builder_init(&b, &batch->screen->devinfo, batch); 7890bf215546Sopenharmony_ci 7891bf215546Sopenharmony_ci /* comparison = draw id < draw count */ 7892bf215546Sopenharmony_ci struct mi_value comparison = 7893bf215546Sopenharmony_ci mi_ult(&b, mi_imm(drawid_offset), 7894bf215546Sopenharmony_ci mi_mem32(ro_bo(draw_count_bo, 7895bf215546Sopenharmony_ci draw_count_offset))); 7896bf215546Sopenharmony_ci#if GFX_VER == 8 7897bf215546Sopenharmony_ci /* predicate = comparison & conditional rendering predicate */ 7898bf215546Sopenharmony_ci mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), 7899bf215546Sopenharmony_ci mi_iand(&b, comparison, mi_reg32(CS_GPR(15)))); 7900bf215546Sopenharmony_ci#else 7901bf215546Sopenharmony_ci /* predicate = comparison & conditional rendering predicate */ 7902bf215546Sopenharmony_ci struct mi_value pred = mi_iand(&b, comparison, 7903bf215546Sopenharmony_ci mi_reg32(CS_GPR(15))); 7904bf215546Sopenharmony_ci 7905bf215546Sopenharmony_ci mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), pred); 7906bf215546Sopenharmony_ci mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0)); 7907bf215546Sopenharmony_ci 7908bf215546Sopenharmony_ci unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | 7909bf215546Sopenharmony_ci MI_PREDICATE_COMBINEOP_SET | 7910bf215546Sopenharmony_ci MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 7911bf215546Sopenharmony_ci 7912bf215546Sopenharmony_ci crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); 7913bf215546Sopenharmony_ci#endif 7914bf215546Sopenharmony_ci#endif 7915bf215546Sopenharmony_ci } else { 7916bf215546Sopenharmony_ci uint32_t mi_predicate; 7917bf215546Sopenharmony_ci 7918bf215546Sopenharmony_ci /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ 7919bf215546Sopenharmony_ci crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, drawid_offset); 7920bf215546Sopenharmony_ci /* Upload the current draw count from the draw parameters buffer 7921bf215546Sopenharmony_ci * to MI_PREDICATE_SRC0. 7922bf215546Sopenharmony_ci */ 7923bf215546Sopenharmony_ci crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, 7924bf215546Sopenharmony_ci draw_count_bo, draw_count_offset); 7925bf215546Sopenharmony_ci /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ 7926bf215546Sopenharmony_ci crocus_load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0); 7927bf215546Sopenharmony_ci 7928bf215546Sopenharmony_ci if (drawid_offset == 0) { 7929bf215546Sopenharmony_ci mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | 7930bf215546Sopenharmony_ci MI_PREDICATE_COMBINEOP_SET | 7931bf215546Sopenharmony_ci MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 7932bf215546Sopenharmony_ci } else { 7933bf215546Sopenharmony_ci /* While draw_index < draw_count the predicate's result will be 7934bf215546Sopenharmony_ci * (draw_index == draw_count) ^ TRUE = TRUE 7935bf215546Sopenharmony_ci * When draw_index == draw_count the result is 7936bf215546Sopenharmony_ci * (TRUE) ^ TRUE = FALSE 7937bf215546Sopenharmony_ci * After this all results will be: 7938bf215546Sopenharmony_ci * (FALSE) ^ FALSE = FALSE 7939bf215546Sopenharmony_ci */ 7940bf215546Sopenharmony_ci mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD | 7941bf215546Sopenharmony_ci MI_PREDICATE_COMBINEOP_XOR | 7942bf215546Sopenharmony_ci MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 7943bf215546Sopenharmony_ci } 7944bf215546Sopenharmony_ci crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); 7945bf215546Sopenharmony_ci } 7946bf215546Sopenharmony_ci } 7947bf215546Sopenharmony_ci 7948bf215546Sopenharmony_ci#if GFX_VER >= 7 7949bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_resource_bo(indirect->buffer); 7950bf215546Sopenharmony_ci assert(bo); 7951bf215546Sopenharmony_ci 7952bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 7953bf215546Sopenharmony_ci lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT; 7954bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, indirect->offset + 0); 7955bf215546Sopenharmony_ci } 7956bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 7957bf215546Sopenharmony_ci lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT; 7958bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, indirect->offset + 4); 7959bf215546Sopenharmony_ci } 7960bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 7961bf215546Sopenharmony_ci lrm.RegisterAddress = _3DPRIM_START_VERTEX; 7962bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, indirect->offset + 8); 7963bf215546Sopenharmony_ci } 7964bf215546Sopenharmony_ci if (draw->index_size) { 7965bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 7966bf215546Sopenharmony_ci lrm.RegisterAddress = _3DPRIM_BASE_VERTEX; 7967bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12); 7968bf215546Sopenharmony_ci } 7969bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 7970bf215546Sopenharmony_ci lrm.RegisterAddress = _3DPRIM_START_INSTANCE; 7971bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, indirect->offset + 16); 7972bf215546Sopenharmony_ci } 7973bf215546Sopenharmony_ci } else { 7974bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 7975bf215546Sopenharmony_ci lrm.RegisterAddress = _3DPRIM_START_INSTANCE; 7976bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12); 7977bf215546Sopenharmony_ci } 7978bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 7979bf215546Sopenharmony_ci lri.RegisterOffset = _3DPRIM_BASE_VERTEX; 7980bf215546Sopenharmony_ci lri.DataDWord = 0; 7981bf215546Sopenharmony_ci } 7982bf215546Sopenharmony_ci } 7983bf215546Sopenharmony_ci#endif 7984bf215546Sopenharmony_ci } else if (indirect && indirect->count_from_stream_output) { 7985bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 7986bf215546Sopenharmony_ci struct crocus_stream_output_target *so = 7987bf215546Sopenharmony_ci (void *) indirect->count_from_stream_output; 7988bf215546Sopenharmony_ci 7989bf215546Sopenharmony_ci /* XXX: Replace with actual cache tracking */ 7990bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, 7991bf215546Sopenharmony_ci "draw count from stream output stall", 7992bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 7993bf215546Sopenharmony_ci 7994bf215546Sopenharmony_ci struct mi_builder b; 7995bf215546Sopenharmony_ci mi_builder_init(&b, &batch->screen->devinfo, batch); 7996bf215546Sopenharmony_ci 7997bf215546Sopenharmony_ci struct crocus_address addr = 7998bf215546Sopenharmony_ci ro_bo(crocus_resource_bo(&so->offset_res->base.b), so->offset_offset); 7999bf215546Sopenharmony_ci struct mi_value offset = 8000bf215546Sopenharmony_ci mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset); 8001bf215546Sopenharmony_ci 8002bf215546Sopenharmony_ci mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), 8003bf215546Sopenharmony_ci mi_udiv32_imm(&b, offset, so->stride)); 8004bf215546Sopenharmony_ci 8005bf215546Sopenharmony_ci _crocus_emit_lri(batch, _3DPRIM_START_VERTEX, 0); 8006bf215546Sopenharmony_ci _crocus_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0); 8007bf215546Sopenharmony_ci _crocus_emit_lri(batch, _3DPRIM_START_INSTANCE, 0); 8008bf215546Sopenharmony_ci _crocus_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count); 8009bf215546Sopenharmony_ci#endif 8010bf215546Sopenharmony_ci } 8011bf215546Sopenharmony_ci#else 8012bf215546Sopenharmony_ci assert(!indirect); 8013bf215546Sopenharmony_ci#endif 8014bf215546Sopenharmony_ci 8015bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { 8016bf215546Sopenharmony_ci prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; 8017bf215546Sopenharmony_ci#if GFX_VER >= 7 8018bf215546Sopenharmony_ci prim.PredicateEnable = use_predicate; 8019bf215546Sopenharmony_ci#endif 8020bf215546Sopenharmony_ci 8021bf215546Sopenharmony_ci prim.PrimitiveTopologyType = translate_prim_type(ice->state.prim_mode, ice->state.patch_vertices); 8022bf215546Sopenharmony_ci if (indirect) { 8023bf215546Sopenharmony_ci // XXX Probably have to do something for gen6 here? 8024bf215546Sopenharmony_ci#if GFX_VER >= 7 8025bf215546Sopenharmony_ci prim.IndirectParameterEnable = true; 8026bf215546Sopenharmony_ci#endif 8027bf215546Sopenharmony_ci } else { 8028bf215546Sopenharmony_ci#if GFX_VER >= 5 8029bf215546Sopenharmony_ci prim.StartInstanceLocation = draw->start_instance; 8030bf215546Sopenharmony_ci#endif 8031bf215546Sopenharmony_ci prim.InstanceCount = draw->instance_count; 8032bf215546Sopenharmony_ci prim.VertexCountPerInstance = sc->count; 8033bf215546Sopenharmony_ci 8034bf215546Sopenharmony_ci prim.StartVertexLocation = sc->start; 8035bf215546Sopenharmony_ci 8036bf215546Sopenharmony_ci if (draw->index_size) { 8037bf215546Sopenharmony_ci prim.BaseVertexLocation += sc->index_bias; 8038bf215546Sopenharmony_ci } 8039bf215546Sopenharmony_ci } 8040bf215546Sopenharmony_ci } 8041bf215546Sopenharmony_ci} 8042bf215546Sopenharmony_ci 8043bf215546Sopenharmony_ci#if GFX_VER >= 7 8044bf215546Sopenharmony_ci 8045bf215546Sopenharmony_cistatic void 8046bf215546Sopenharmony_cicrocus_upload_compute_state(struct crocus_context *ice, 8047bf215546Sopenharmony_ci struct crocus_batch *batch, 8048bf215546Sopenharmony_ci const struct pipe_grid_info *grid) 8049bf215546Sopenharmony_ci{ 8050bf215546Sopenharmony_ci const uint64_t stage_dirty = ice->state.stage_dirty; 8051bf215546Sopenharmony_ci struct crocus_screen *screen = batch->screen; 8052bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &screen->devinfo; 8053bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; 8054bf215546Sopenharmony_ci struct crocus_compiled_shader *shader = 8055bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_COMPUTE]; 8056bf215546Sopenharmony_ci struct brw_stage_prog_data *prog_data = shader->prog_data; 8057bf215546Sopenharmony_ci struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; 8058bf215546Sopenharmony_ci const struct brw_cs_dispatch_info dispatch = 8059bf215546Sopenharmony_ci brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block); 8060bf215546Sopenharmony_ci 8061bf215546Sopenharmony_ci crocus_update_surface_base_address(batch); 8062bf215546Sopenharmony_ci if ((stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS) && shs->sysvals_need_upload) 8063bf215546Sopenharmony_ci upload_sysvals(ice, MESA_SHADER_COMPUTE); 8064bf215546Sopenharmony_ci 8065bf215546Sopenharmony_ci if (stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_CS) { 8066bf215546Sopenharmony_ci crocus_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false); 8067bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset = 8068bf215546Sopenharmony_ci crocus_upload_binding_table(ice, batch, 8069bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_COMPUTE]->surf_offset, 8070bf215546Sopenharmony_ci ice->shaders.prog[MESA_SHADER_COMPUTE]->bt.size_bytes); 8071bf215546Sopenharmony_ci } 8072bf215546Sopenharmony_ci 8073bf215546Sopenharmony_ci if (stage_dirty & CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS) 8074bf215546Sopenharmony_ci crocus_upload_sampler_states(ice, batch, MESA_SHADER_COMPUTE); 8075bf215546Sopenharmony_ci 8076bf215546Sopenharmony_ci if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) || 8077bf215546Sopenharmony_ci cs_prog_data->local_size[0] == 0 /* Variable local group size */) { 8078bf215546Sopenharmony_ci /* The MEDIA_VFE_STATE documentation for Gen8+ says: 8079bf215546Sopenharmony_ci * 8080bf215546Sopenharmony_ci * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless 8081bf215546Sopenharmony_ci * the only bits that are changed are scoreboard related: Scoreboard 8082bf215546Sopenharmony_ci * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta. For 8083bf215546Sopenharmony_ci * these scoreboard related states, a MEDIA_STATE_FLUSH is 8084bf215546Sopenharmony_ci * sufficient." 8085bf215546Sopenharmony_ci */ 8086bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, 8087bf215546Sopenharmony_ci "workaround: stall before MEDIA_VFE_STATE", 8088bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 8089bf215546Sopenharmony_ci 8090bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) { 8091bf215546Sopenharmony_ci if (prog_data->total_scratch) { 8092bf215546Sopenharmony_ci struct crocus_bo *bo = 8093bf215546Sopenharmony_ci crocus_get_scratch_space(ice, prog_data->total_scratch, 8094bf215546Sopenharmony_ci MESA_SHADER_COMPUTE); 8095bf215546Sopenharmony_ci#if GFX_VER == 8 8096bf215546Sopenharmony_ci /* Broadwell's Per Thread Scratch Space is in the range [0, 11] 8097bf215546Sopenharmony_ci * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M. 8098bf215546Sopenharmony_ci */ 8099bf215546Sopenharmony_ci vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; 8100bf215546Sopenharmony_ci#elif GFX_VERx10 == 75 8101bf215546Sopenharmony_ci /* Haswell's Per Thread Scratch Space is in the range [0, 10] 8102bf215546Sopenharmony_ci * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. 8103bf215546Sopenharmony_ci */ 8104bf215546Sopenharmony_ci vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 12; 8105bf215546Sopenharmony_ci#else 8106bf215546Sopenharmony_ci /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB] 8107bf215546Sopenharmony_ci * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB. 8108bf215546Sopenharmony_ci */ 8109bf215546Sopenharmony_ci vfe.PerThreadScratchSpace = prog_data->total_scratch / 1024 - 1; 8110bf215546Sopenharmony_ci#endif 8111bf215546Sopenharmony_ci vfe.ScratchSpaceBasePointer = rw_bo(bo, 0); 8112bf215546Sopenharmony_ci } 8113bf215546Sopenharmony_ci 8114bf215546Sopenharmony_ci vfe.MaximumNumberofThreads = 8115bf215546Sopenharmony_ci devinfo->max_cs_threads * devinfo->subslice_total - 1; 8116bf215546Sopenharmony_ci vfe.ResetGatewayTimer = 8117bf215546Sopenharmony_ci Resettingrelativetimerandlatchingtheglobaltimestamp; 8118bf215546Sopenharmony_ci vfe.BypassGatewayControl = true; 8119bf215546Sopenharmony_ci#if GFX_VER == 7 8120bf215546Sopenharmony_ci vfe.GPGPUMode = true; 8121bf215546Sopenharmony_ci#endif 8122bf215546Sopenharmony_ci#if GFX_VER == 8 8123bf215546Sopenharmony_ci vfe.BypassGatewayControl = true; 8124bf215546Sopenharmony_ci#endif 8125bf215546Sopenharmony_ci vfe.NumberofURBEntries = GFX_VER == 8 ? 2 : 0; 8126bf215546Sopenharmony_ci vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0; 8127bf215546Sopenharmony_ci 8128bf215546Sopenharmony_ci vfe.CURBEAllocationSize = 8129bf215546Sopenharmony_ci ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads + 8130bf215546Sopenharmony_ci cs_prog_data->push.cross_thread.regs, 2); 8131bf215546Sopenharmony_ci } 8132bf215546Sopenharmony_ci } 8133bf215546Sopenharmony_ci 8134bf215546Sopenharmony_ci /* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */ 8135bf215546Sopenharmony_ci if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) || 8136bf215546Sopenharmony_ci cs_prog_data->local_size[0] == 0 /* Variable local group size */) { 8137bf215546Sopenharmony_ci uint32_t curbe_data_offset = 0; 8138bf215546Sopenharmony_ci assert(cs_prog_data->push.cross_thread.dwords == 0 && 8139bf215546Sopenharmony_ci cs_prog_data->push.per_thread.dwords == 1 && 8140bf215546Sopenharmony_ci cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); 8141bf215546Sopenharmony_ci const unsigned push_const_size = 8142bf215546Sopenharmony_ci brw_cs_push_const_total_size(cs_prog_data, dispatch.threads); 8143bf215546Sopenharmony_ci uint32_t *curbe_data_map = 8144bf215546Sopenharmony_ci stream_state(batch, 8145bf215546Sopenharmony_ci ALIGN(push_const_size, 64), 64, 8146bf215546Sopenharmony_ci &curbe_data_offset); 8147bf215546Sopenharmony_ci assert(curbe_data_map); 8148bf215546Sopenharmony_ci memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64)); 8149bf215546Sopenharmony_ci crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads, 8150bf215546Sopenharmony_ci curbe_data_map); 8151bf215546Sopenharmony_ci 8152bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { 8153bf215546Sopenharmony_ci curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); 8154bf215546Sopenharmony_ci curbe.CURBEDataStartAddress = curbe_data_offset; 8155bf215546Sopenharmony_ci } 8156bf215546Sopenharmony_ci } 8157bf215546Sopenharmony_ci 8158bf215546Sopenharmony_ci if (stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | 8159bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_BINDINGS_CS | 8160bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_CS | 8161bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CS)) { 8162bf215546Sopenharmony_ci uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; 8163bf215546Sopenharmony_ci const uint64_t ksp = KSP(ice,shader) + brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size); 8164bf215546Sopenharmony_ci crocus_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) { 8165bf215546Sopenharmony_ci idd.KernelStartPointer = ksp; 8166bf215546Sopenharmony_ci idd.SamplerStatePointer = shs->sampler_offset; 8167bf215546Sopenharmony_ci idd.BindingTablePointer = ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset; 8168bf215546Sopenharmony_ci idd.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31); 8169bf215546Sopenharmony_ci idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads; 8170bf215546Sopenharmony_ci idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs; 8171bf215546Sopenharmony_ci idd.BarrierEnable = cs_prog_data->uses_barrier; 8172bf215546Sopenharmony_ci idd.SharedLocalMemorySize = encode_slm_size(GFX_VER, 8173bf215546Sopenharmony_ci prog_data->total_shared); 8174bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 8175bf215546Sopenharmony_ci idd.CrossThreadConstantDataReadLength = cs_prog_data->push.cross_thread.regs; 8176bf215546Sopenharmony_ci#endif 8177bf215546Sopenharmony_ci } 8178bf215546Sopenharmony_ci 8179bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) { 8180bf215546Sopenharmony_ci load.InterfaceDescriptorTotalLength = 8181bf215546Sopenharmony_ci GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); 8182bf215546Sopenharmony_ci load.InterfaceDescriptorDataStartAddress = 8183bf215546Sopenharmony_ci emit_state(batch, desc, sizeof(desc), 64); 8184bf215546Sopenharmony_ci } 8185bf215546Sopenharmony_ci } 8186bf215546Sopenharmony_ci 8187bf215546Sopenharmony_ci#define GPGPU_DISPATCHDIMX 0x2500 8188bf215546Sopenharmony_ci#define GPGPU_DISPATCHDIMY 0x2504 8189bf215546Sopenharmony_ci#define GPGPU_DISPATCHDIMZ 0x2508 8190bf215546Sopenharmony_ci 8191bf215546Sopenharmony_ci if (grid->indirect) { 8192bf215546Sopenharmony_ci struct crocus_state_ref *grid_size = &ice->state.grid_size; 8193bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_resource_bo(grid_size->res); 8194bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 8195bf215546Sopenharmony_ci lrm.RegisterAddress = GPGPU_DISPATCHDIMX; 8196bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0); 8197bf215546Sopenharmony_ci } 8198bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 8199bf215546Sopenharmony_ci lrm.RegisterAddress = GPGPU_DISPATCHDIMY; 8200bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4); 8201bf215546Sopenharmony_ci } 8202bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { 8203bf215546Sopenharmony_ci lrm.RegisterAddress = GPGPU_DISPATCHDIMZ; 8204bf215546Sopenharmony_ci lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8); 8205bf215546Sopenharmony_ci } 8206bf215546Sopenharmony_ci 8207bf215546Sopenharmony_ci#if GFX_VER == 7 8208bf215546Sopenharmony_ci /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ 8209bf215546Sopenharmony_ci _crocus_emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0); 8210bf215546Sopenharmony_ci crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, 0); 8211bf215546Sopenharmony_ci 8212bf215546Sopenharmony_ci /* Load compute_dispatch_indirect_x_size into SRC0 */ 8213bf215546Sopenharmony_ci crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 0); 8214bf215546Sopenharmony_ci 8215bf215546Sopenharmony_ci /* predicate = (compute_dispatch_indirect_x_size == 0); */ 8216bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) { 8217bf215546Sopenharmony_ci mip.LoadOperation = LOAD_LOAD; 8218bf215546Sopenharmony_ci mip.CombineOperation = COMBINE_SET; 8219bf215546Sopenharmony_ci mip.CompareOperation = COMPARE_SRCS_EQUAL; 8220bf215546Sopenharmony_ci }; 8221bf215546Sopenharmony_ci 8222bf215546Sopenharmony_ci /* Load compute_dispatch_indirect_y_size into SRC0 */ 8223bf215546Sopenharmony_ci crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 4); 8224bf215546Sopenharmony_ci 8225bf215546Sopenharmony_ci /* predicate = (compute_dispatch_indirect_y_size == 0); */ 8226bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) { 8227bf215546Sopenharmony_ci mip.LoadOperation = LOAD_LOAD; 8228bf215546Sopenharmony_ci mip.CombineOperation = COMBINE_OR; 8229bf215546Sopenharmony_ci mip.CompareOperation = COMPARE_SRCS_EQUAL; 8230bf215546Sopenharmony_ci }; 8231bf215546Sopenharmony_ci 8232bf215546Sopenharmony_ci /* Load compute_dispatch_indirect_z_size into SRC0 */ 8233bf215546Sopenharmony_ci crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 8); 8234bf215546Sopenharmony_ci 8235bf215546Sopenharmony_ci /* predicate = (compute_dispatch_indirect_z_size == 0); */ 8236bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) { 8237bf215546Sopenharmony_ci mip.LoadOperation = LOAD_LOAD; 8238bf215546Sopenharmony_ci mip.CombineOperation = COMBINE_OR; 8239bf215546Sopenharmony_ci mip.CompareOperation = COMPARE_SRCS_EQUAL; 8240bf215546Sopenharmony_ci }; 8241bf215546Sopenharmony_ci 8242bf215546Sopenharmony_ci /* predicate = !predicate; */ 8243bf215546Sopenharmony_ci#define COMPARE_FALSE 1 8244bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) { 8245bf215546Sopenharmony_ci mip.LoadOperation = LOAD_LOADINV; 8246bf215546Sopenharmony_ci mip.CombineOperation = COMBINE_OR; 8247bf215546Sopenharmony_ci mip.CompareOperation = COMPARE_FALSE; 8248bf215546Sopenharmony_ci } 8249bf215546Sopenharmony_ci#endif 8250bf215546Sopenharmony_ci } 8251bf215546Sopenharmony_ci 8252bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) { 8253bf215546Sopenharmony_ci ggw.IndirectParameterEnable = grid->indirect != NULL; 8254bf215546Sopenharmony_ci ggw.PredicateEnable = GFX_VER <= 7 && grid->indirect != NULL; 8255bf215546Sopenharmony_ci ggw.SIMDSize = dispatch.simd_size / 16; 8256bf215546Sopenharmony_ci ggw.ThreadDepthCounterMaximum = 0; 8257bf215546Sopenharmony_ci ggw.ThreadHeightCounterMaximum = 0; 8258bf215546Sopenharmony_ci ggw.ThreadWidthCounterMaximum = dispatch.threads - 1; 8259bf215546Sopenharmony_ci ggw.ThreadGroupIDXDimension = grid->grid[0]; 8260bf215546Sopenharmony_ci ggw.ThreadGroupIDYDimension = grid->grid[1]; 8261bf215546Sopenharmony_ci ggw.ThreadGroupIDZDimension = grid->grid[2]; 8262bf215546Sopenharmony_ci ggw.RightExecutionMask = dispatch.right_mask; 8263bf215546Sopenharmony_ci ggw.BottomExecutionMask = 0xffffffff; 8264bf215546Sopenharmony_ci } 8265bf215546Sopenharmony_ci 8266bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf); 8267bf215546Sopenharmony_ci 8268bf215546Sopenharmony_ci batch->contains_draw = true; 8269bf215546Sopenharmony_ci} 8270bf215546Sopenharmony_ci 8271bf215546Sopenharmony_ci#endif /* GFX_VER >= 7 */ 8272bf215546Sopenharmony_ci 8273bf215546Sopenharmony_ci/** 8274bf215546Sopenharmony_ci * State module teardown. 8275bf215546Sopenharmony_ci */ 8276bf215546Sopenharmony_cistatic void 8277bf215546Sopenharmony_cicrocus_destroy_state(struct crocus_context *ice) 8278bf215546Sopenharmony_ci{ 8279bf215546Sopenharmony_ci pipe_resource_reference(&ice->draw.draw_params.res, NULL); 8280bf215546Sopenharmony_ci pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL); 8281bf215546Sopenharmony_ci 8282bf215546Sopenharmony_ci free(ice->state.genx); 8283bf215546Sopenharmony_ci 8284bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 8285bf215546Sopenharmony_ci pipe_so_target_reference(&ice->state.so_target[i], NULL); 8286bf215546Sopenharmony_ci } 8287bf215546Sopenharmony_ci 8288bf215546Sopenharmony_ci for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) { 8289bf215546Sopenharmony_ci pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL); 8290bf215546Sopenharmony_ci } 8291bf215546Sopenharmony_ci pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL); 8292bf215546Sopenharmony_ci 8293bf215546Sopenharmony_ci for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { 8294bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[stage]; 8295bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 8296bf215546Sopenharmony_ci pipe_resource_reference(&shs->constbufs[i].buffer, NULL); 8297bf215546Sopenharmony_ci } 8298bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) { 8299bf215546Sopenharmony_ci pipe_resource_reference(&shs->image[i].base.resource, NULL); 8300bf215546Sopenharmony_ci } 8301bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) { 8302bf215546Sopenharmony_ci pipe_resource_reference(&shs->ssbo[i].buffer, NULL); 8303bf215546Sopenharmony_ci } 8304bf215546Sopenharmony_ci for (int i = 0; i < CROCUS_MAX_TEXTURE_SAMPLERS; i++) { 8305bf215546Sopenharmony_ci pipe_sampler_view_reference((struct pipe_sampler_view **) 8306bf215546Sopenharmony_ci &shs->textures[i], NULL); 8307bf215546Sopenharmony_ci } 8308bf215546Sopenharmony_ci } 8309bf215546Sopenharmony_ci 8310bf215546Sopenharmony_ci for (int i = 0; i < 16; i++) 8311bf215546Sopenharmony_ci pipe_resource_reference(&ice->state.vertex_buffers[i].buffer.resource, NULL); 8312bf215546Sopenharmony_ci pipe_resource_reference(&ice->state.grid_size.res, NULL); 8313bf215546Sopenharmony_ci 8314bf215546Sopenharmony_ci pipe_resource_reference(&ice->state.index_buffer.res, NULL); 8315bf215546Sopenharmony_ci} 8316bf215546Sopenharmony_ci 8317bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */ 8318bf215546Sopenharmony_ci 8319bf215546Sopenharmony_cistatic void 8320bf215546Sopenharmony_cicrocus_rebind_buffer(struct crocus_context *ice, 8321bf215546Sopenharmony_ci struct crocus_resource *res) 8322bf215546Sopenharmony_ci{ 8323bf215546Sopenharmony_ci struct pipe_context *ctx = &ice->ctx; 8324bf215546Sopenharmony_ci 8325bf215546Sopenharmony_ci assert(res->base.b.target == PIPE_BUFFER); 8326bf215546Sopenharmony_ci 8327bf215546Sopenharmony_ci /* Buffers can't be framebuffer attachments, nor display related, 8328bf215546Sopenharmony_ci * and we don't have upstream Clover support. 8329bf215546Sopenharmony_ci */ 8330bf215546Sopenharmony_ci assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL | 8331bf215546Sopenharmony_ci PIPE_BIND_RENDER_TARGET | 8332bf215546Sopenharmony_ci PIPE_BIND_BLENDABLE | 8333bf215546Sopenharmony_ci PIPE_BIND_DISPLAY_TARGET | 8334bf215546Sopenharmony_ci PIPE_BIND_CURSOR | 8335bf215546Sopenharmony_ci PIPE_BIND_COMPUTE_RESOURCE | 8336bf215546Sopenharmony_ci PIPE_BIND_GLOBAL))); 8337bf215546Sopenharmony_ci 8338bf215546Sopenharmony_ci if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) { 8339bf215546Sopenharmony_ci uint64_t bound_vbs = ice->state.bound_vertex_buffers; 8340bf215546Sopenharmony_ci while (bound_vbs) { 8341bf215546Sopenharmony_ci const int i = u_bit_scan64(&bound_vbs); 8342bf215546Sopenharmony_ci struct pipe_vertex_buffer *buffer = &ice->state.vertex_buffers[i]; 8343bf215546Sopenharmony_ci 8344bf215546Sopenharmony_ci if (!buffer->is_user_buffer && &res->base.b == buffer->buffer.resource) 8345bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS; 8346bf215546Sopenharmony_ci } 8347bf215546Sopenharmony_ci } 8348bf215546Sopenharmony_ci 8349bf215546Sopenharmony_ci if ((res->bind_history & PIPE_BIND_INDEX_BUFFER) && 8350bf215546Sopenharmony_ci ice->state.index_buffer.res) { 8351bf215546Sopenharmony_ci if (res->bo == crocus_resource_bo(ice->state.index_buffer.res)) 8352bf215546Sopenharmony_ci pipe_resource_reference(&ice->state.index_buffer.res, NULL); 8353bf215546Sopenharmony_ci } 8354bf215546Sopenharmony_ci /* There is no need to handle these: 8355bf215546Sopenharmony_ci * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw) 8356bf215546Sopenharmony_ci * - PIPE_BIND_QUERY_BUFFER (no persistent state references) 8357bf215546Sopenharmony_ci */ 8358bf215546Sopenharmony_ci 8359bf215546Sopenharmony_ci if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) { 8360bf215546Sopenharmony_ci /* XXX: be careful about resetting vs appending... */ 8361bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 8362bf215546Sopenharmony_ci if (ice->state.so_target[i] && 8363bf215546Sopenharmony_ci (ice->state.so_target[i]->buffer == &res->base.b)) { 8364bf215546Sopenharmony_ci#if GFX_VER == 6 8365bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS; 8366bf215546Sopenharmony_ci#else 8367bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS; 8368bf215546Sopenharmony_ci#endif 8369bf215546Sopenharmony_ci } 8370bf215546Sopenharmony_ci } 8371bf215546Sopenharmony_ci } 8372bf215546Sopenharmony_ci 8373bf215546Sopenharmony_ci for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) { 8374bf215546Sopenharmony_ci struct crocus_shader_state *shs = &ice->state.shaders[s]; 8375bf215546Sopenharmony_ci enum pipe_shader_type p_stage = stage_to_pipe(s); 8376bf215546Sopenharmony_ci 8377bf215546Sopenharmony_ci if (!(res->bind_stages & (1 << s))) 8378bf215546Sopenharmony_ci continue; 8379bf215546Sopenharmony_ci 8380bf215546Sopenharmony_ci if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) { 8381bf215546Sopenharmony_ci /* Skip constant buffer 0, it's for regular uniforms, not UBOs */ 8382bf215546Sopenharmony_ci uint32_t bound_cbufs = shs->bound_cbufs & ~1u; 8383bf215546Sopenharmony_ci while (bound_cbufs) { 8384bf215546Sopenharmony_ci const int i = u_bit_scan(&bound_cbufs); 8385bf215546Sopenharmony_ci struct pipe_constant_buffer *cbuf = &shs->constbufs[i]; 8386bf215546Sopenharmony_ci 8387bf215546Sopenharmony_ci if (res->bo == crocus_resource_bo(cbuf->buffer)) { 8388bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << s; 8389bf215546Sopenharmony_ci } 8390bf215546Sopenharmony_ci } 8391bf215546Sopenharmony_ci } 8392bf215546Sopenharmony_ci 8393bf215546Sopenharmony_ci if (res->bind_history & PIPE_BIND_SHADER_BUFFER) { 8394bf215546Sopenharmony_ci uint32_t bound_ssbos = shs->bound_ssbos; 8395bf215546Sopenharmony_ci while (bound_ssbos) { 8396bf215546Sopenharmony_ci const int i = u_bit_scan(&bound_ssbos); 8397bf215546Sopenharmony_ci struct pipe_shader_buffer *ssbo = &shs->ssbo[i]; 8398bf215546Sopenharmony_ci 8399bf215546Sopenharmony_ci if (res->bo == crocus_resource_bo(ssbo->buffer)) { 8400bf215546Sopenharmony_ci struct pipe_shader_buffer buf = { 8401bf215546Sopenharmony_ci .buffer = &res->base.b, 8402bf215546Sopenharmony_ci .buffer_offset = ssbo->buffer_offset, 8403bf215546Sopenharmony_ci .buffer_size = ssbo->buffer_size, 8404bf215546Sopenharmony_ci }; 8405bf215546Sopenharmony_ci crocus_set_shader_buffers(ctx, p_stage, i, 1, &buf, 8406bf215546Sopenharmony_ci (shs->writable_ssbos >> i) & 1); 8407bf215546Sopenharmony_ci } 8408bf215546Sopenharmony_ci } 8409bf215546Sopenharmony_ci } 8410bf215546Sopenharmony_ci 8411bf215546Sopenharmony_ci if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) { 8412bf215546Sopenharmony_ci uint32_t bound_sampler_views = shs->bound_sampler_views; 8413bf215546Sopenharmony_ci while (bound_sampler_views) { 8414bf215546Sopenharmony_ci const int i = u_bit_scan(&bound_sampler_views); 8415bf215546Sopenharmony_ci struct crocus_sampler_view *isv = shs->textures[i]; 8416bf215546Sopenharmony_ci struct crocus_bo *bo = isv->res->bo; 8417bf215546Sopenharmony_ci 8418bf215546Sopenharmony_ci if (res->bo == bo) { 8419bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s; 8420bf215546Sopenharmony_ci } 8421bf215546Sopenharmony_ci } 8422bf215546Sopenharmony_ci } 8423bf215546Sopenharmony_ci 8424bf215546Sopenharmony_ci if (res->bind_history & PIPE_BIND_SHADER_IMAGE) { 8425bf215546Sopenharmony_ci uint32_t bound_image_views = shs->bound_image_views; 8426bf215546Sopenharmony_ci while (bound_image_views) { 8427bf215546Sopenharmony_ci const int i = u_bit_scan(&bound_image_views); 8428bf215546Sopenharmony_ci struct crocus_image_view *iv = &shs->image[i]; 8429bf215546Sopenharmony_ci struct crocus_bo *bo = crocus_resource_bo(iv->base.resource); 8430bf215546Sopenharmony_ci 8431bf215546Sopenharmony_ci if (res->bo == bo) 8432bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s; 8433bf215546Sopenharmony_ci } 8434bf215546Sopenharmony_ci } 8435bf215546Sopenharmony_ci } 8436bf215546Sopenharmony_ci} 8437bf215546Sopenharmony_ci 8438bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */ 8439bf215546Sopenharmony_ci 8440bf215546Sopenharmony_cistatic unsigned 8441bf215546Sopenharmony_ciflags_to_post_sync_op(uint32_t flags) 8442bf215546Sopenharmony_ci{ 8443bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) 8444bf215546Sopenharmony_ci return WriteImmediateData; 8445bf215546Sopenharmony_ci 8446bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) 8447bf215546Sopenharmony_ci return WritePSDepthCount; 8448bf215546Sopenharmony_ci 8449bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) 8450bf215546Sopenharmony_ci return WriteTimestamp; 8451bf215546Sopenharmony_ci 8452bf215546Sopenharmony_ci return 0; 8453bf215546Sopenharmony_ci} 8454bf215546Sopenharmony_ci 8455bf215546Sopenharmony_ci/* 8456bf215546Sopenharmony_ci * Do the given flags have a Post Sync or LRI Post Sync operation? 8457bf215546Sopenharmony_ci */ 8458bf215546Sopenharmony_cistatic enum pipe_control_flags 8459bf215546Sopenharmony_ciget_post_sync_flags(enum pipe_control_flags flags) 8460bf215546Sopenharmony_ci{ 8461bf215546Sopenharmony_ci flags &= PIPE_CONTROL_WRITE_IMMEDIATE | 8462bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_DEPTH_COUNT | 8463bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_TIMESTAMP | 8464bf215546Sopenharmony_ci PIPE_CONTROL_LRI_POST_SYNC_OP; 8465bf215546Sopenharmony_ci 8466bf215546Sopenharmony_ci /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with 8467bf215546Sopenharmony_ci * "LRI Post Sync Operation". So more than one bit set would be illegal. 8468bf215546Sopenharmony_ci */ 8469bf215546Sopenharmony_ci assert(util_bitcount(flags) <= 1); 8470bf215546Sopenharmony_ci 8471bf215546Sopenharmony_ci return flags; 8472bf215546Sopenharmony_ci} 8473bf215546Sopenharmony_ci 8474bf215546Sopenharmony_ci#define IS_COMPUTE_PIPELINE(batch) (batch->name == CROCUS_BATCH_COMPUTE) 8475bf215546Sopenharmony_ci 8476bf215546Sopenharmony_ci/** 8477bf215546Sopenharmony_ci * Emit a series of PIPE_CONTROL commands, taking into account any 8478bf215546Sopenharmony_ci * workarounds necessary to actually accomplish the caller's request. 8479bf215546Sopenharmony_ci * 8480bf215546Sopenharmony_ci * Unless otherwise noted, spec quotations in this function come from: 8481bf215546Sopenharmony_ci * 8482bf215546Sopenharmony_ci * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming 8483bf215546Sopenharmony_ci * Restrictions for PIPE_CONTROL. 8484bf215546Sopenharmony_ci * 8485bf215546Sopenharmony_ci * You should not use this function directly. Use the helpers in 8486bf215546Sopenharmony_ci * crocus_pipe_control.c instead, which may split the pipe control further. 8487bf215546Sopenharmony_ci */ 8488bf215546Sopenharmony_cistatic void 8489bf215546Sopenharmony_cicrocus_emit_raw_pipe_control(struct crocus_batch *batch, 8490bf215546Sopenharmony_ci const char *reason, 8491bf215546Sopenharmony_ci uint32_t flags, 8492bf215546Sopenharmony_ci struct crocus_bo *bo, 8493bf215546Sopenharmony_ci uint32_t offset, 8494bf215546Sopenharmony_ci uint64_t imm) 8495bf215546Sopenharmony_ci{ 8496bf215546Sopenharmony_ci UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo; 8497bf215546Sopenharmony_ci enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); 8498bf215546Sopenharmony_ci UNUSED enum pipe_control_flags non_lri_post_sync_flags = 8499bf215546Sopenharmony_ci post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; 8500bf215546Sopenharmony_ci 8501bf215546Sopenharmony_ci /* Recursive PIPE_CONTROL workarounds -------------------------------- 8502bf215546Sopenharmony_ci * (http://knowyourmeme.com/memes/xzibit-yo-dawg) 8503bf215546Sopenharmony_ci * 8504bf215546Sopenharmony_ci * We do these first because we want to look at the original operation, 8505bf215546Sopenharmony_ci * rather than any workarounds we set. 8506bf215546Sopenharmony_ci */ 8507bf215546Sopenharmony_ci 8508bf215546Sopenharmony_ci /* "Flush Types" workarounds --------------------------------------------- 8509bf215546Sopenharmony_ci * We do these now because they may add post-sync operations or CS stalls. 8510bf215546Sopenharmony_ci */ 8511bf215546Sopenharmony_ci 8512bf215546Sopenharmony_ci if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { 8513bf215546Sopenharmony_ci /* Hardware workaround: SNB B-Spec says: 8514bf215546Sopenharmony_ci * 8515bf215546Sopenharmony_ci * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush 8516bf215546Sopenharmony_ci * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is 8517bf215546Sopenharmony_ci * required." 8518bf215546Sopenharmony_ci */ 8519bf215546Sopenharmony_ci crocus_emit_post_sync_nonzero_flush(batch); 8520bf215546Sopenharmony_ci } 8521bf215546Sopenharmony_ci 8522bf215546Sopenharmony_ci#if GFX_VER == 8 8523bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { 8524bf215546Sopenharmony_ci /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate 8525bf215546Sopenharmony_ci * 8526bf215546Sopenharmony_ci * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or 8527bf215546Sopenharmony_ci * 'Write PS Depth Count' or 'Write Timestamp'." 8528bf215546Sopenharmony_ci */ 8529bf215546Sopenharmony_ci if (!bo) { 8530bf215546Sopenharmony_ci flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 8531bf215546Sopenharmony_ci post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 8532bf215546Sopenharmony_ci non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; 8533bf215546Sopenharmony_ci bo = batch->ice->workaround_bo; 8534bf215546Sopenharmony_ci offset = batch->ice->workaround_offset; 8535bf215546Sopenharmony_ci } 8536bf215546Sopenharmony_ci } 8537bf215546Sopenharmony_ci#endif 8538bf215546Sopenharmony_ci 8539bf215546Sopenharmony_ci#if GFX_VERx10 < 75 8540bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_DEPTH_STALL) { 8541bf215546Sopenharmony_ci /* Project: PRE-HSW / Argument: Depth Stall 8542bf215546Sopenharmony_ci * 8543bf215546Sopenharmony_ci * "The following bits must be clear: 8544bf215546Sopenharmony_ci * - Render Target Cache Flush Enable ([12] of DW1) 8545bf215546Sopenharmony_ci * - Depth Cache Flush Enable ([0] of DW1)" 8546bf215546Sopenharmony_ci */ 8547bf215546Sopenharmony_ci assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | 8548bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH))); 8549bf215546Sopenharmony_ci } 8550bf215546Sopenharmony_ci#endif 8551bf215546Sopenharmony_ci if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) { 8552bf215546Sopenharmony_ci /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): 8553bf215546Sopenharmony_ci * 8554bf215546Sopenharmony_ci * "This bit must be DISABLED for operations other than writing 8555bf215546Sopenharmony_ci * PS_DEPTH_COUNT." 8556bf215546Sopenharmony_ci * 8557bf215546Sopenharmony_ci * This seems like nonsense. An Ivybridge workaround requires us to 8558bf215546Sopenharmony_ci * emit a PIPE_CONTROL with a depth stall and write immediate post-sync 8559bf215546Sopenharmony_ci * operation. Gen8+ requires us to emit depth stalls and depth cache 8560bf215546Sopenharmony_ci * flushes together. So, it's hard to imagine this means anything other 8561bf215546Sopenharmony_ci * than "we originally intended this to be used for PS_DEPTH_COUNT". 8562bf215546Sopenharmony_ci * 8563bf215546Sopenharmony_ci * We ignore the supposed restriction and do nothing. 8564bf215546Sopenharmony_ci */ 8565bf215546Sopenharmony_ci } 8566bf215546Sopenharmony_ci 8567bf215546Sopenharmony_ci if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) { 8568bf215546Sopenharmony_ci /* Project: PRE-HSW / Argument: Depth Cache Flush 8569bf215546Sopenharmony_ci * 8570bf215546Sopenharmony_ci * "Depth Stall must be clear ([13] of DW1)." 8571bf215546Sopenharmony_ci */ 8572bf215546Sopenharmony_ci assert(!(flags & PIPE_CONTROL_DEPTH_STALL)); 8573bf215546Sopenharmony_ci } 8574bf215546Sopenharmony_ci 8575bf215546Sopenharmony_ci if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | 8576bf215546Sopenharmony_ci PIPE_CONTROL_STALL_AT_SCOREBOARD)) { 8577bf215546Sopenharmony_ci /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: 8578bf215546Sopenharmony_ci * 8579bf215546Sopenharmony_ci * "This bit must be DISABLED for End-of-pipe (Read) fences, 8580bf215546Sopenharmony_ci * PS_DEPTH_COUNT or TIMESTAMP queries." 8581bf215546Sopenharmony_ci * 8582bf215546Sopenharmony_ci * TODO: Implement end-of-pipe checking. 8583bf215546Sopenharmony_ci */ 8584bf215546Sopenharmony_ci assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | 8585bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_TIMESTAMP))); 8586bf215546Sopenharmony_ci } 8587bf215546Sopenharmony_ci 8588bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) { 8589bf215546Sopenharmony_ci /* From the PIPE_CONTROL instruction table, bit 1: 8590bf215546Sopenharmony_ci * 8591bf215546Sopenharmony_ci * "This bit is ignored if Depth Stall Enable is set. 8592bf215546Sopenharmony_ci * Further, the render cache is not flushed even if Write Cache 8593bf215546Sopenharmony_ci * Flush Enable bit is set." 8594bf215546Sopenharmony_ci * 8595bf215546Sopenharmony_ci * We assert that the caller doesn't do this combination, to try and 8596bf215546Sopenharmony_ci * prevent mistakes. It shouldn't hurt the GPU, though. 8597bf215546Sopenharmony_ci * 8598bf215546Sopenharmony_ci * We skip this check on Gen11+ as the "Stall at Pixel Scoreboard" 8599bf215546Sopenharmony_ci * and "Render Target Flush" combo is explicitly required for BTI 8600bf215546Sopenharmony_ci * update workarounds. 8601bf215546Sopenharmony_ci */ 8602bf215546Sopenharmony_ci assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | 8603bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH))); 8604bf215546Sopenharmony_ci } 8605bf215546Sopenharmony_ci 8606bf215546Sopenharmony_ci /* PIPE_CONTROL page workarounds ------------------------------------- */ 8607bf215546Sopenharmony_ci 8608bf215546Sopenharmony_ci if (GFX_VER >= 7 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { 8609bf215546Sopenharmony_ci /* From the PIPE_CONTROL page itself: 8610bf215546Sopenharmony_ci * 8611bf215546Sopenharmony_ci * "IVB, HSW, BDW 8612bf215546Sopenharmony_ci * Restriction: Pipe_control with CS-stall bit set must be issued 8613bf215546Sopenharmony_ci * before a pipe-control command that has the State Cache 8614bf215546Sopenharmony_ci * Invalidate bit set." 8615bf215546Sopenharmony_ci */ 8616bf215546Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 8617bf215546Sopenharmony_ci } 8618bf215546Sopenharmony_ci 8619bf215546Sopenharmony_ci if ((GFX_VERx10 == 75)) { 8620bf215546Sopenharmony_ci /* From the PIPE_CONTROL page itself: 8621bf215546Sopenharmony_ci * 8622bf215546Sopenharmony_ci * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation: 8623bf215546Sopenharmony_ci * Prior to programming a PIPECONTROL command with any of the RO 8624bf215546Sopenharmony_ci * cache invalidation bit set, program a PIPECONTROL flush command 8625bf215546Sopenharmony_ci * with “CS stall” bit and “HDC Flush” bit set." 8626bf215546Sopenharmony_ci * 8627bf215546Sopenharmony_ci * TODO: Actually implement this. What's an HDC Flush? 8628bf215546Sopenharmony_ci */ 8629bf215546Sopenharmony_ci } 8630bf215546Sopenharmony_ci 8631bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_FLUSH_LLC) { 8632bf215546Sopenharmony_ci /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): 8633bf215546Sopenharmony_ci * 8634bf215546Sopenharmony_ci * "Project: ALL 8635bf215546Sopenharmony_ci * SW must always program Post-Sync Operation to "Write Immediate 8636bf215546Sopenharmony_ci * Data" when Flush LLC is set." 8637bf215546Sopenharmony_ci * 8638bf215546Sopenharmony_ci * For now, we just require the caller to do it. 8639bf215546Sopenharmony_ci */ 8640bf215546Sopenharmony_ci assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); 8641bf215546Sopenharmony_ci } 8642bf215546Sopenharmony_ci 8643bf215546Sopenharmony_ci /* "Post-Sync Operation" workarounds -------------------------------- */ 8644bf215546Sopenharmony_ci 8645bf215546Sopenharmony_ci /* Project: All / Argument: Global Snapshot Count Reset [19] 8646bf215546Sopenharmony_ci * 8647bf215546Sopenharmony_ci * "This bit must not be exercised on any product. 8648bf215546Sopenharmony_ci * Requires stall bit ([20] of DW1) set." 8649bf215546Sopenharmony_ci * 8650bf215546Sopenharmony_ci * We don't use this, so we just assert that it isn't used. The 8651bf215546Sopenharmony_ci * PIPE_CONTROL instruction page indicates that they intended this 8652bf215546Sopenharmony_ci * as a debug feature and don't think it is useful in production, 8653bf215546Sopenharmony_ci * but it may actually be usable, should we ever want to. 8654bf215546Sopenharmony_ci */ 8655bf215546Sopenharmony_ci assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); 8656bf215546Sopenharmony_ci 8657bf215546Sopenharmony_ci if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | 8658bf215546Sopenharmony_ci PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { 8659bf215546Sopenharmony_ci /* Project: All / Arguments: 8660bf215546Sopenharmony_ci * 8661bf215546Sopenharmony_ci * - Generic Media State Clear [16] 8662bf215546Sopenharmony_ci * - Indirect State Pointers Disable [16] 8663bf215546Sopenharmony_ci * 8664bf215546Sopenharmony_ci * "Requires stall bit ([20] of DW1) set." 8665bf215546Sopenharmony_ci * 8666bf215546Sopenharmony_ci * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media 8667bf215546Sopenharmony_ci * State Clear) says: 8668bf215546Sopenharmony_ci * 8669bf215546Sopenharmony_ci * "PIPECONTROL command with “Command Streamer Stall Enable” must be 8670bf215546Sopenharmony_ci * programmed prior to programming a PIPECONTROL command with "Media 8671bf215546Sopenharmony_ci * State Clear" set in GPGPU mode of operation" 8672bf215546Sopenharmony_ci * 8673bf215546Sopenharmony_ci * This is a subset of the earlier rule, so there's nothing to do. 8674bf215546Sopenharmony_ci */ 8675bf215546Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 8676bf215546Sopenharmony_ci } 8677bf215546Sopenharmony_ci 8678bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { 8679bf215546Sopenharmony_ci /* Project: All / Argument: Store Data Index 8680bf215546Sopenharmony_ci * 8681bf215546Sopenharmony_ci * "Post-Sync Operation ([15:14] of DW1) must be set to something other 8682bf215546Sopenharmony_ci * than '0'." 8683bf215546Sopenharmony_ci * 8684bf215546Sopenharmony_ci * For now, we just assert that the caller does this. We might want to 8685bf215546Sopenharmony_ci * automatically add a write to the workaround BO... 8686bf215546Sopenharmony_ci */ 8687bf215546Sopenharmony_ci assert(non_lri_post_sync_flags != 0); 8688bf215546Sopenharmony_ci } 8689bf215546Sopenharmony_ci 8690bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_SYNC_GFDT) { 8691bf215546Sopenharmony_ci /* Project: All / Argument: Sync GFDT 8692bf215546Sopenharmony_ci * 8693bf215546Sopenharmony_ci * "Post-Sync Operation ([15:14] of DW1) must be set to something other 8694bf215546Sopenharmony_ci * than '0' or 0x2520[13] must be set." 8695bf215546Sopenharmony_ci * 8696bf215546Sopenharmony_ci * For now, we just assert that the caller does this. 8697bf215546Sopenharmony_ci */ 8698bf215546Sopenharmony_ci assert(non_lri_post_sync_flags != 0); 8699bf215546Sopenharmony_ci } 8700bf215546Sopenharmony_ci 8701bf215546Sopenharmony_ci if (GFX_VER >= 6 && GFX_VER < 8 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { 8702bf215546Sopenharmony_ci /* Project: SNB, IVB, HSW / Argument: TLB inv 8703bf215546Sopenharmony_ci * 8704bf215546Sopenharmony_ci * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1) 8705bf215546Sopenharmony_ci * must be set to something other than '0'." 8706bf215546Sopenharmony_ci * 8707bf215546Sopenharmony_ci * For now, we just assert that the caller does this. 8708bf215546Sopenharmony_ci */ 8709bf215546Sopenharmony_ci assert(non_lri_post_sync_flags != 0); 8710bf215546Sopenharmony_ci } 8711bf215546Sopenharmony_ci 8712bf215546Sopenharmony_ci if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { 8713bf215546Sopenharmony_ci /* Project: IVB+ / Argument: TLB inv 8714bf215546Sopenharmony_ci * 8715bf215546Sopenharmony_ci * "Requires stall bit ([20] of DW1) set." 8716bf215546Sopenharmony_ci * 8717bf215546Sopenharmony_ci * Also, from the PIPE_CONTROL instruction table: 8718bf215546Sopenharmony_ci * 8719bf215546Sopenharmony_ci * "Project: SKL+ 8720bf215546Sopenharmony_ci * Post Sync Operation or CS stall must be set to ensure a TLB 8721bf215546Sopenharmony_ci * invalidation occurs. Otherwise no cycle will occur to the TLB 8722bf215546Sopenharmony_ci * cache to invalidate." 8723bf215546Sopenharmony_ci * 8724bf215546Sopenharmony_ci * This is not a subset of the earlier rule, so there's nothing to do. 8725bf215546Sopenharmony_ci */ 8726bf215546Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 8727bf215546Sopenharmony_ci } 8728bf215546Sopenharmony_ci#if GFX_VER == 8 8729bf215546Sopenharmony_ci if (IS_COMPUTE_PIPELINE(batch)) { 8730bf215546Sopenharmony_ci if (post_sync_flags || 8731bf215546Sopenharmony_ci (flags & (PIPE_CONTROL_NOTIFY_ENABLE | 8732bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_STALL | 8733bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH | 8734bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 8735bf215546Sopenharmony_ci PIPE_CONTROL_DATA_CACHE_FLUSH))) { 8736bf215546Sopenharmony_ci /* Project: BDW / Arguments: 8737bf215546Sopenharmony_ci * 8738bf215546Sopenharmony_ci * - LRI Post Sync Operation [23] 8739bf215546Sopenharmony_ci * - Post Sync Op [15:14] 8740bf215546Sopenharmony_ci * - Notify En [8] 8741bf215546Sopenharmony_ci * - Depth Stall [13] 8742bf215546Sopenharmony_ci * - Render Target Cache Flush [12] 8743bf215546Sopenharmony_ci * - Depth Cache Flush [0] 8744bf215546Sopenharmony_ci * - DC Flush Enable [5] 8745bf215546Sopenharmony_ci * 8746bf215546Sopenharmony_ci * "Requires stall bit ([20] of DW) set for all GPGPU and Media 8747bf215546Sopenharmony_ci * Workloads." 8748bf215546Sopenharmony_ci * 8749bf215546Sopenharmony_ci * (The docs have separate table rows for each bit, with essentially 8750bf215546Sopenharmony_ci * the same workaround text. We've combined them here.) 8751bf215546Sopenharmony_ci */ 8752bf215546Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 8753bf215546Sopenharmony_ci 8754bf215546Sopenharmony_ci /* Also, from the PIPE_CONTROL instruction table, bit 20: 8755bf215546Sopenharmony_ci * 8756bf215546Sopenharmony_ci * "Project: BDW 8757bf215546Sopenharmony_ci * This bit must be always set when PIPE_CONTROL command is 8758bf215546Sopenharmony_ci * programmed by GPGPU and MEDIA workloads, except for the cases 8759bf215546Sopenharmony_ci * when only Read Only Cache Invalidation bits are set (State 8760bf215546Sopenharmony_ci * Cache Invalidation Enable, Instruction cache Invalidation 8761bf215546Sopenharmony_ci * Enable, Texture Cache Invalidation Enable, Constant Cache 8762bf215546Sopenharmony_ci * Invalidation Enable). This is to WA FFDOP CG issue, this WA 8763bf215546Sopenharmony_ci * need not implemented when FF_DOP_CG is disable via "Fixed 8764bf215546Sopenharmony_ci * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." 8765bf215546Sopenharmony_ci * 8766bf215546Sopenharmony_ci * It sounds like we could avoid CS stalls in some cases, but we 8767bf215546Sopenharmony_ci * don't currently bother. This list isn't exactly the list above, 8768bf215546Sopenharmony_ci * either... 8769bf215546Sopenharmony_ci */ 8770bf215546Sopenharmony_ci } 8771bf215546Sopenharmony_ci } 8772bf215546Sopenharmony_ci#endif 8773bf215546Sopenharmony_ci /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: 8774bf215546Sopenharmony_ci * 8775bf215546Sopenharmony_ci * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with 8776bf215546Sopenharmony_ci * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." 8777bf215546Sopenharmony_ci * 8778bf215546Sopenharmony_ci * Note that the kernel does CS stalls between batches, so we only need 8779bf215546Sopenharmony_ci * to count them within a batch. We currently naively count every 4, and 8780bf215546Sopenharmony_ci * don't skip the ones with only read-cache-invalidate bits set. This 8781bf215546Sopenharmony_ci * may or may not be a problem... 8782bf215546Sopenharmony_ci */ 8783bf215546Sopenharmony_ci if (GFX_VER == 7 && !(GFX_VERx10 == 75)) { 8784bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_CS_STALL) { 8785bf215546Sopenharmony_ci /* If we're doing a CS stall, reset the counter and carry on. */ 8786bf215546Sopenharmony_ci batch->pipe_controls_since_last_cs_stall = 0; 8787bf215546Sopenharmony_ci } 8788bf215546Sopenharmony_ci 8789bf215546Sopenharmony_ci /* If this is the fourth pipe control without a CS stall, do one now. */ 8790bf215546Sopenharmony_ci if (++batch->pipe_controls_since_last_cs_stall == 4) { 8791bf215546Sopenharmony_ci batch->pipe_controls_since_last_cs_stall = 0; 8792bf215546Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 8793bf215546Sopenharmony_ci } 8794bf215546Sopenharmony_ci } 8795bf215546Sopenharmony_ci 8796bf215546Sopenharmony_ci /* "Stall" workarounds ---------------------------------------------- 8797bf215546Sopenharmony_ci * These have to come after the earlier ones because we may have added 8798bf215546Sopenharmony_ci * some additional CS stalls above. 8799bf215546Sopenharmony_ci */ 8800bf215546Sopenharmony_ci 8801bf215546Sopenharmony_ci if (flags & PIPE_CONTROL_CS_STALL) { 8802bf215546Sopenharmony_ci /* Project: PRE-SKL, VLV, CHV 8803bf215546Sopenharmony_ci * 8804bf215546Sopenharmony_ci * "[All Stepping][All SKUs]: 8805bf215546Sopenharmony_ci * 8806bf215546Sopenharmony_ci * One of the following must also be set: 8807bf215546Sopenharmony_ci * 8808bf215546Sopenharmony_ci * - Render Target Cache Flush Enable ([12] of DW1) 8809bf215546Sopenharmony_ci * - Depth Cache Flush Enable ([0] of DW1) 8810bf215546Sopenharmony_ci * - Stall at Pixel Scoreboard ([1] of DW1) 8811bf215546Sopenharmony_ci * - Depth Stall ([13] of DW1) 8812bf215546Sopenharmony_ci * - Post-Sync Operation ([13] of DW1) 8813bf215546Sopenharmony_ci * - DC Flush Enable ([5] of DW1)" 8814bf215546Sopenharmony_ci * 8815bf215546Sopenharmony_ci * If we don't already have one of those bits set, we choose to add 8816bf215546Sopenharmony_ci * "Stall at Pixel Scoreboard". Some of the other bits require a 8817bf215546Sopenharmony_ci * CS stall as a workaround (see above), which would send us into 8818bf215546Sopenharmony_ci * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" 8819bf215546Sopenharmony_ci * appears to be safe, so we choose that. 8820bf215546Sopenharmony_ci */ 8821bf215546Sopenharmony_ci const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | 8822bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 8823bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_IMMEDIATE | 8824bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_DEPTH_COUNT | 8825bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_TIMESTAMP | 8826bf215546Sopenharmony_ci PIPE_CONTROL_STALL_AT_SCOREBOARD | 8827bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_STALL | 8828bf215546Sopenharmony_ci PIPE_CONTROL_DATA_CACHE_FLUSH; 8829bf215546Sopenharmony_ci if (!(flags & wa_bits)) 8830bf215546Sopenharmony_ci flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 8831bf215546Sopenharmony_ci } 8832bf215546Sopenharmony_ci 8833bf215546Sopenharmony_ci /* Emit --------------------------------------------------------------- */ 8834bf215546Sopenharmony_ci 8835bf215546Sopenharmony_ci if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) { 8836bf215546Sopenharmony_ci fprintf(stderr, 8837bf215546Sopenharmony_ci " PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n", 8838bf215546Sopenharmony_ci (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "", 8839bf215546Sopenharmony_ci (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "", 8840bf215546Sopenharmony_ci (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "", 8841bf215546Sopenharmony_ci (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "", 8842bf215546Sopenharmony_ci (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "", 8843bf215546Sopenharmony_ci (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "", 8844bf215546Sopenharmony_ci (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "", 8845bf215546Sopenharmony_ci (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "", 8846bf215546Sopenharmony_ci (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "", 8847bf215546Sopenharmony_ci (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "", 8848bf215546Sopenharmony_ci (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "", 8849bf215546Sopenharmony_ci (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "", 8850bf215546Sopenharmony_ci (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "", 8851bf215546Sopenharmony_ci (flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "", 8852bf215546Sopenharmony_ci (flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "", 8853bf215546Sopenharmony_ci (flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ? 8854bf215546Sopenharmony_ci "SnapRes" : "", 8855bf215546Sopenharmony_ci (flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ? 8856bf215546Sopenharmony_ci "ISPDis" : "", 8857bf215546Sopenharmony_ci (flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "", 8858bf215546Sopenharmony_ci (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "", 8859bf215546Sopenharmony_ci (flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "", 8860bf215546Sopenharmony_ci imm, reason); 8861bf215546Sopenharmony_ci } 8862bf215546Sopenharmony_ci 8863bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(PIPE_CONTROL), pc) { 8864bf215546Sopenharmony_ci#if GFX_VER >= 7 8865bf215546Sopenharmony_ci pc.LRIPostSyncOperation = NoLRIOperation; 8866bf215546Sopenharmony_ci pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; 8867bf215546Sopenharmony_ci pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; 8868bf215546Sopenharmony_ci#endif 8869bf215546Sopenharmony_ci#if GFX_VER >= 6 8870bf215546Sopenharmony_ci pc.StoreDataIndex = 0; 8871bf215546Sopenharmony_ci pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; 8872bf215546Sopenharmony_ci pc.GlobalSnapshotCountReset = 8873bf215546Sopenharmony_ci flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; 8874bf215546Sopenharmony_ci pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; 8875bf215546Sopenharmony_ci pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; 8876bf215546Sopenharmony_ci pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; 8877bf215546Sopenharmony_ci pc.RenderTargetCacheFlushEnable = 8878bf215546Sopenharmony_ci flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; 8879bf215546Sopenharmony_ci pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; 8880bf215546Sopenharmony_ci pc.StateCacheInvalidationEnable = 8881bf215546Sopenharmony_ci flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; 8882bf215546Sopenharmony_ci pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; 8883bf215546Sopenharmony_ci pc.ConstantCacheInvalidationEnable = 8884bf215546Sopenharmony_ci flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; 8885bf215546Sopenharmony_ci#else 8886bf215546Sopenharmony_ci pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; 8887bf215546Sopenharmony_ci#endif 8888bf215546Sopenharmony_ci pc.PostSyncOperation = flags_to_post_sync_op(flags); 8889bf215546Sopenharmony_ci pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; 8890bf215546Sopenharmony_ci pc.InstructionCacheInvalidateEnable = 8891bf215546Sopenharmony_ci flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; 8892bf215546Sopenharmony_ci pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; 8893bf215546Sopenharmony_ci#if GFX_VER >= 5 || GFX_VERx10 == 45 8894bf215546Sopenharmony_ci pc.IndirectStatePointersDisable = 8895bf215546Sopenharmony_ci flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; 8896bf215546Sopenharmony_ci#endif 8897bf215546Sopenharmony_ci#if GFX_VER >= 6 8898bf215546Sopenharmony_ci pc.TextureCacheInvalidationEnable = 8899bf215546Sopenharmony_ci flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 8900bf215546Sopenharmony_ci#elif GFX_VER == 5 || GFX_VERx10 == 45 8901bf215546Sopenharmony_ci pc.TextureCacheFlushEnable = 8902bf215546Sopenharmony_ci flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 8903bf215546Sopenharmony_ci#endif 8904bf215546Sopenharmony_ci pc.Address = ggtt_bo(bo, offset); 8905bf215546Sopenharmony_ci if (GFX_VER < 7 && bo) 8906bf215546Sopenharmony_ci pc.DestinationAddressType = DAT_GGTT; 8907bf215546Sopenharmony_ci pc.ImmediateData = imm; 8908bf215546Sopenharmony_ci } 8909bf215546Sopenharmony_ci} 8910bf215546Sopenharmony_ci 8911bf215546Sopenharmony_ci#if GFX_VER == 6 8912bf215546Sopenharmony_civoid 8913bf215546Sopenharmony_cigenX(crocus_upload_urb)(struct crocus_batch *batch, 8914bf215546Sopenharmony_ci unsigned vs_size, 8915bf215546Sopenharmony_ci bool gs_present, 8916bf215546Sopenharmony_ci unsigned gs_size) 8917bf215546Sopenharmony_ci{ 8918bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 8919bf215546Sopenharmony_ci int nr_vs_entries, nr_gs_entries; 8920bf215546Sopenharmony_ci int total_urb_size = ice->urb.size * 1024; /* in bytes */ 8921bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &batch->screen->devinfo; 8922bf215546Sopenharmony_ci 8923bf215546Sopenharmony_ci /* Calculate how many entries fit in each stage's section of the URB */ 8924bf215546Sopenharmony_ci if (gs_present) { 8925bf215546Sopenharmony_ci nr_vs_entries = (total_urb_size/2) / (vs_size * 128); 8926bf215546Sopenharmony_ci nr_gs_entries = (total_urb_size/2) / (gs_size * 128); 8927bf215546Sopenharmony_ci } else { 8928bf215546Sopenharmony_ci nr_vs_entries = total_urb_size / (vs_size * 128); 8929bf215546Sopenharmony_ci nr_gs_entries = 0; 8930bf215546Sopenharmony_ci } 8931bf215546Sopenharmony_ci 8932bf215546Sopenharmony_ci /* Then clamp to the maximum allowed by the hardware */ 8933bf215546Sopenharmony_ci if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX]) 8934bf215546Sopenharmony_ci nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX]; 8935bf215546Sopenharmony_ci 8936bf215546Sopenharmony_ci if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]) 8937bf215546Sopenharmony_ci nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]; 8938bf215546Sopenharmony_ci 8939bf215546Sopenharmony_ci /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */ 8940bf215546Sopenharmony_ci ice->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); 8941bf215546Sopenharmony_ci ice->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4); 8942bf215546Sopenharmony_ci 8943bf215546Sopenharmony_ci assert(ice->urb.nr_vs_entries >= 8944bf215546Sopenharmony_ci devinfo->urb.min_entries[MESA_SHADER_VERTEX]); 8945bf215546Sopenharmony_ci assert(ice->urb.nr_vs_entries % 4 == 0); 8946bf215546Sopenharmony_ci assert(ice->urb.nr_gs_entries % 4 == 0); 8947bf215546Sopenharmony_ci assert(vs_size <= 5); 8948bf215546Sopenharmony_ci assert(gs_size <= 5); 8949bf215546Sopenharmony_ci 8950bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_URB), urb) { 8951bf215546Sopenharmony_ci urb.VSNumberofURBEntries = ice->urb.nr_vs_entries; 8952bf215546Sopenharmony_ci urb.VSURBEntryAllocationSize = vs_size - 1; 8953bf215546Sopenharmony_ci 8954bf215546Sopenharmony_ci urb.GSNumberofURBEntries = ice->urb.nr_gs_entries; 8955bf215546Sopenharmony_ci urb.GSURBEntryAllocationSize = gs_size - 1; 8956bf215546Sopenharmony_ci }; 8957bf215546Sopenharmony_ci /* From the PRM Volume 2 part 1, section 1.4.7: 8958bf215546Sopenharmony_ci * 8959bf215546Sopenharmony_ci * Because of a urb corruption caused by allocating a previous gsunit’s 8960bf215546Sopenharmony_ci * urb entry to vsunit software is required to send a "GS NULL 8961bf215546Sopenharmony_ci * Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus 8962bf215546Sopenharmony_ci * a dummy DRAW call before any case where VS will be taking over GS URB 8963bf215546Sopenharmony_ci * space. 8964bf215546Sopenharmony_ci * 8965bf215546Sopenharmony_ci * It is not clear exactly what this means ("URB fence" is a command that 8966bf215546Sopenharmony_ci * doesn't exist on Gen6). So for now we just do a full pipeline flush as 8967bf215546Sopenharmony_ci * a workaround. 8968bf215546Sopenharmony_ci */ 8969bf215546Sopenharmony_ci if (ice->urb.gs_present && !gs_present) 8970bf215546Sopenharmony_ci crocus_emit_mi_flush(batch); 8971bf215546Sopenharmony_ci ice->urb.gs_present = gs_present; 8972bf215546Sopenharmony_ci} 8973bf215546Sopenharmony_ci#endif 8974bf215546Sopenharmony_ci 8975bf215546Sopenharmony_cistatic void 8976bf215546Sopenharmony_cicrocus_lost_genx_state(struct crocus_context *ice, struct crocus_batch *batch) 8977bf215546Sopenharmony_ci{ 8978bf215546Sopenharmony_ci} 8979bf215546Sopenharmony_ci 8980bf215546Sopenharmony_cistatic void 8981bf215546Sopenharmony_cicrocus_emit_mi_report_perf_count(struct crocus_batch *batch, 8982bf215546Sopenharmony_ci struct crocus_bo *bo, 8983bf215546Sopenharmony_ci uint32_t offset_in_bytes, 8984bf215546Sopenharmony_ci uint32_t report_id) 8985bf215546Sopenharmony_ci{ 8986bf215546Sopenharmony_ci#if GFX_VER >= 7 8987bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) { 8988bf215546Sopenharmony_ci mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes); 8989bf215546Sopenharmony_ci mi_rpc.ReportID = report_id; 8990bf215546Sopenharmony_ci } 8991bf215546Sopenharmony_ci#endif 8992bf215546Sopenharmony_ci} 8993bf215546Sopenharmony_ci 8994bf215546Sopenharmony_ci/** 8995bf215546Sopenharmony_ci * From the PRM, Volume 2a: 8996bf215546Sopenharmony_ci * 8997bf215546Sopenharmony_ci * "Indirect State Pointers Disable 8998bf215546Sopenharmony_ci * 8999bf215546Sopenharmony_ci * At the completion of the post-sync operation associated with this pipe 9000bf215546Sopenharmony_ci * control packet, the indirect state pointers in the hardware are 9001bf215546Sopenharmony_ci * considered invalid; the indirect pointers are not saved in the context. 9002bf215546Sopenharmony_ci * If any new indirect state commands are executed in the command stream 9003bf215546Sopenharmony_ci * while the pipe control is pending, the new indirect state commands are 9004bf215546Sopenharmony_ci * preserved. 9005bf215546Sopenharmony_ci * 9006bf215546Sopenharmony_ci * [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context 9007bf215546Sopenharmony_ci * restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant 9008bf215546Sopenharmony_ci * commands are only considered as Indirect State Pointers. Once ISP is 9009bf215546Sopenharmony_ci * issued in a context, SW must initialize by programming push constant 9010bf215546Sopenharmony_ci * commands for all the shaders (at least to zero length) before attempting 9011bf215546Sopenharmony_ci * any rendering operation for the same context." 9012bf215546Sopenharmony_ci * 9013bf215546Sopenharmony_ci * 3DSTATE_CONSTANT_* packets are restored during a context restore, 9014bf215546Sopenharmony_ci * even though they point to a BO that has been already unreferenced at 9015bf215546Sopenharmony_ci * the end of the previous batch buffer. This has been fine so far since 9016bf215546Sopenharmony_ci * we are protected by these scratch page (every address not covered by 9017bf215546Sopenharmony_ci * a BO should be pointing to the scratch page). But on CNL, it is 9018bf215546Sopenharmony_ci * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_* 9019bf215546Sopenharmony_ci * instruction. 9020bf215546Sopenharmony_ci * 9021bf215546Sopenharmony_ci * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the 9022bf215546Sopenharmony_ci * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a 9023bf215546Sopenharmony_ci * context restore, so the mentioned hang doesn't happen. However, 9024bf215546Sopenharmony_ci * software must program push constant commands for all stages prior to 9025bf215546Sopenharmony_ci * rendering anything, so we flag them as dirty. 9026bf215546Sopenharmony_ci * 9027bf215546Sopenharmony_ci * Finally, we also make sure to stall at pixel scoreboard to make sure the 9028bf215546Sopenharmony_ci * constants have been loaded into the EUs prior to disable the push constants 9029bf215546Sopenharmony_ci * so that it doesn't hang a previous 3DPRIMITIVE. 9030bf215546Sopenharmony_ci */ 9031bf215546Sopenharmony_ci#if GFX_VER >= 7 9032bf215546Sopenharmony_cistatic void 9033bf215546Sopenharmony_cigen7_emit_isp_disable(struct crocus_batch *batch) 9034bf215546Sopenharmony_ci{ 9035bf215546Sopenharmony_ci crocus_emit_raw_pipe_control(batch, "isp disable", 9036bf215546Sopenharmony_ci PIPE_CONTROL_STALL_AT_SCOREBOARD | 9037bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL, 9038bf215546Sopenharmony_ci NULL, 0, 0); 9039bf215546Sopenharmony_ci crocus_emit_raw_pipe_control(batch, "isp disable", 9040bf215546Sopenharmony_ci PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | 9041bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL, 9042bf215546Sopenharmony_ci NULL, 0, 0); 9043bf215546Sopenharmony_ci 9044bf215546Sopenharmony_ci struct crocus_context *ice = batch->ice; 9045bf215546Sopenharmony_ci ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_CONSTANTS_VS | 9046bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_TCS | 9047bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_TES | 9048bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_GS | 9049bf215546Sopenharmony_ci CROCUS_STAGE_DIRTY_CONSTANTS_FS); 9050bf215546Sopenharmony_ci} 9051bf215546Sopenharmony_ci#endif 9052bf215546Sopenharmony_ci 9053bf215546Sopenharmony_ci#if GFX_VER >= 7 9054bf215546Sopenharmony_cistatic void 9055bf215546Sopenharmony_cicrocus_state_finish_batch(struct crocus_batch *batch) 9056bf215546Sopenharmony_ci{ 9057bf215546Sopenharmony_ci#if GFX_VERx10 == 75 9058bf215546Sopenharmony_ci if (batch->name == CROCUS_BATCH_RENDER) { 9059bf215546Sopenharmony_ci crocus_emit_mi_flush(batch); 9060bf215546Sopenharmony_ci crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { 9061bf215546Sopenharmony_ci ptr.ColorCalcStatePointer = batch->ice->shaders.cc_offset; 9062bf215546Sopenharmony_ci } 9063bf215546Sopenharmony_ci 9064bf215546Sopenharmony_ci crocus_emit_pipe_control_flush(batch, "hsw wa", PIPE_CONTROL_RENDER_TARGET_FLUSH | 9065bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 9066bf215546Sopenharmony_ci } 9067bf215546Sopenharmony_ci#endif 9068bf215546Sopenharmony_ci gen7_emit_isp_disable(batch); 9069bf215546Sopenharmony_ci} 9070bf215546Sopenharmony_ci#endif 9071bf215546Sopenharmony_ci 9072bf215546Sopenharmony_cistatic void 9073bf215546Sopenharmony_cicrocus_batch_reset_dirty(struct crocus_batch *batch) 9074bf215546Sopenharmony_ci{ 9075bf215546Sopenharmony_ci /* unreference any index buffer so it get reemitted. */ 9076bf215546Sopenharmony_ci pipe_resource_reference(&batch->ice->state.index_buffer.res, NULL); 9077bf215546Sopenharmony_ci 9078bf215546Sopenharmony_ci /* for GEN4/5 need to reemit anything that ends up in the state batch that points to anything in the state batch 9079bf215546Sopenharmony_ci * as the old state batch won't still be available. 9080bf215546Sopenharmony_ci */ 9081bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER | 9082bf215546Sopenharmony_ci CROCUS_DIRTY_COLOR_CALC_STATE; 9083bf215546Sopenharmony_ci 9084bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS; 9085bf215546Sopenharmony_ci 9086bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS; 9087bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS; 9088bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES; 9089bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS; 9090bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS; 9091bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS; 9092bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS; 9093bf215546Sopenharmony_ci 9094bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS; 9095bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES; 9096bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS; 9097bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_GS; 9098bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_FS; 9099bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS; 9100bf215546Sopenharmony_ci 9101bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS; 9102bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS; 9103bf215546Sopenharmony_ci batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS; 9104bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT | CROCUS_DIRTY_SF_CL_VIEWPORT; 9105bf215546Sopenharmony_ci 9106bf215546Sopenharmony_ci#if GFX_VER >= 6 9107bf215546Sopenharmony_ci /* SCISSOR_STATE */ 9108bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE; 9109bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT; 9110bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL; 9111bf215546Sopenharmony_ci 9112bf215546Sopenharmony_ci#endif 9113bf215546Sopenharmony_ci#if GFX_VER <= 5 9114bf215546Sopenharmony_ci /* dirty the SF state on gen4/5 */ 9115bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_RASTER; 9116bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE; 9117bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_CLIP; 9118bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_WM; 9119bf215546Sopenharmony_ci#endif 9120bf215546Sopenharmony_ci#if GFX_VER >= 7 9121bf215546Sopenharmony_ci /* Streamout dirty */ 9122bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_STREAMOUT; 9123bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST; 9124bf215546Sopenharmony_ci batch->ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS; 9125bf215546Sopenharmony_ci#endif 9126bf215546Sopenharmony_ci} 9127bf215546Sopenharmony_ci 9128bf215546Sopenharmony_ci#if GFX_VERx10 == 75 9129bf215546Sopenharmony_cistruct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ice) 9130bf215546Sopenharmony_ci{ 9131bf215546Sopenharmony_ci return &ice->state.cso_rast->cso; 9132bf215546Sopenharmony_ci} 9133bf215546Sopenharmony_ci#endif 9134bf215546Sopenharmony_ci 9135bf215546Sopenharmony_ci#if GFX_VER >= 6 9136bf215546Sopenharmony_cistatic void update_so_strides(struct crocus_context *ice, 9137bf215546Sopenharmony_ci uint16_t *strides) 9138bf215546Sopenharmony_ci{ 9139bf215546Sopenharmony_ci for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 9140bf215546Sopenharmony_ci struct crocus_stream_output_target *so = (void *)ice->state.so_target[i]; 9141bf215546Sopenharmony_ci if (so) 9142bf215546Sopenharmony_ci so->stride = strides[i] * sizeof(uint32_t); 9143bf215546Sopenharmony_ci } 9144bf215546Sopenharmony_ci} 9145bf215546Sopenharmony_ci#endif 9146bf215546Sopenharmony_ci 9147bf215546Sopenharmony_cistatic void crocus_fill_clamp_mask(const struct crocus_sampler_state *samp, 9148bf215546Sopenharmony_ci int s, 9149bf215546Sopenharmony_ci uint32_t *clamp_mask) 9150bf215546Sopenharmony_ci{ 9151bf215546Sopenharmony_ci#if GFX_VER < 8 9152bf215546Sopenharmony_ci if (samp->pstate.min_img_filter != PIPE_TEX_FILTER_NEAREST && 9153bf215546Sopenharmony_ci samp->pstate.mag_img_filter != PIPE_TEX_FILTER_NEAREST) { 9154bf215546Sopenharmony_ci if (samp->pstate.wrap_s == PIPE_TEX_WRAP_CLAMP) 9155bf215546Sopenharmony_ci clamp_mask[0] |= (1 << s); 9156bf215546Sopenharmony_ci if (samp->pstate.wrap_t == PIPE_TEX_WRAP_CLAMP) 9157bf215546Sopenharmony_ci clamp_mask[1] |= (1 << s); 9158bf215546Sopenharmony_ci if (samp->pstate.wrap_r == PIPE_TEX_WRAP_CLAMP) 9159bf215546Sopenharmony_ci clamp_mask[2] |= (1 << s); 9160bf215546Sopenharmony_ci } 9161bf215546Sopenharmony_ci#endif 9162bf215546Sopenharmony_ci} 9163bf215546Sopenharmony_ci 9164bf215546Sopenharmony_cistatic void 9165bf215546Sopenharmony_cicrocus_set_frontend_noop(struct pipe_context *ctx, bool enable) 9166bf215546Sopenharmony_ci{ 9167bf215546Sopenharmony_ci struct crocus_context *ice = (struct crocus_context *) ctx; 9168bf215546Sopenharmony_ci 9169bf215546Sopenharmony_ci if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_RENDER], enable)) { 9170bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER; 9171bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER; 9172bf215546Sopenharmony_ci } 9173bf215546Sopenharmony_ci 9174bf215546Sopenharmony_ci if (ice->batch_count == 1) 9175bf215546Sopenharmony_ci return; 9176bf215546Sopenharmony_ci 9177bf215546Sopenharmony_ci if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_COMPUTE], enable)) { 9178bf215546Sopenharmony_ci ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE; 9179bf215546Sopenharmony_ci ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE; 9180bf215546Sopenharmony_ci } 9181bf215546Sopenharmony_ci} 9182bf215546Sopenharmony_ci 9183bf215546Sopenharmony_civoid 9184bf215546Sopenharmony_cigenX(crocus_init_screen_state)(struct crocus_screen *screen) 9185bf215546Sopenharmony_ci{ 9186bf215546Sopenharmony_ci assert(screen->devinfo.verx10 == GFX_VERx10); 9187bf215546Sopenharmony_ci assert(screen->devinfo.ver == GFX_VER); 9188bf215546Sopenharmony_ci screen->vtbl.destroy_state = crocus_destroy_state; 9189bf215546Sopenharmony_ci screen->vtbl.init_render_context = crocus_init_render_context; 9190bf215546Sopenharmony_ci screen->vtbl.upload_render_state = crocus_upload_render_state; 9191bf215546Sopenharmony_ci#if GFX_VER >= 7 9192bf215546Sopenharmony_ci screen->vtbl.init_compute_context = crocus_init_compute_context; 9193bf215546Sopenharmony_ci screen->vtbl.upload_compute_state = crocus_upload_compute_state; 9194bf215546Sopenharmony_ci#endif 9195bf215546Sopenharmony_ci screen->vtbl.emit_raw_pipe_control = crocus_emit_raw_pipe_control; 9196bf215546Sopenharmony_ci screen->vtbl.emit_mi_report_perf_count = crocus_emit_mi_report_perf_count; 9197bf215546Sopenharmony_ci screen->vtbl.rebind_buffer = crocus_rebind_buffer; 9198bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 9199bf215546Sopenharmony_ci screen->vtbl.load_register_reg32 = crocus_load_register_reg32; 9200bf215546Sopenharmony_ci screen->vtbl.load_register_reg64 = crocus_load_register_reg64; 9201bf215546Sopenharmony_ci screen->vtbl.load_register_imm32 = crocus_load_register_imm32; 9202bf215546Sopenharmony_ci screen->vtbl.load_register_imm64 = crocus_load_register_imm64; 9203bf215546Sopenharmony_ci screen->vtbl.store_data_imm32 = crocus_store_data_imm32; 9204bf215546Sopenharmony_ci screen->vtbl.store_data_imm64 = crocus_store_data_imm64; 9205bf215546Sopenharmony_ci#endif 9206bf215546Sopenharmony_ci#if GFX_VER >= 7 9207bf215546Sopenharmony_ci screen->vtbl.load_register_mem32 = crocus_load_register_mem32; 9208bf215546Sopenharmony_ci screen->vtbl.load_register_mem64 = crocus_load_register_mem64; 9209bf215546Sopenharmony_ci screen->vtbl.copy_mem_mem = crocus_copy_mem_mem; 9210bf215546Sopenharmony_ci screen->vtbl.create_so_decl_list = crocus_create_so_decl_list; 9211bf215546Sopenharmony_ci#endif 9212bf215546Sopenharmony_ci screen->vtbl.update_surface_base_address = crocus_update_surface_base_address; 9213bf215546Sopenharmony_ci#if GFX_VER >= 6 9214bf215546Sopenharmony_ci screen->vtbl.store_register_mem32 = crocus_store_register_mem32; 9215bf215546Sopenharmony_ci screen->vtbl.store_register_mem64 = crocus_store_register_mem64; 9216bf215546Sopenharmony_ci#endif 9217bf215546Sopenharmony_ci screen->vtbl.populate_vs_key = crocus_populate_vs_key; 9218bf215546Sopenharmony_ci screen->vtbl.populate_tcs_key = crocus_populate_tcs_key; 9219bf215546Sopenharmony_ci screen->vtbl.populate_tes_key = crocus_populate_tes_key; 9220bf215546Sopenharmony_ci screen->vtbl.populate_gs_key = crocus_populate_gs_key; 9221bf215546Sopenharmony_ci screen->vtbl.populate_fs_key = crocus_populate_fs_key; 9222bf215546Sopenharmony_ci screen->vtbl.populate_cs_key = crocus_populate_cs_key; 9223bf215546Sopenharmony_ci screen->vtbl.lost_genx_state = crocus_lost_genx_state; 9224bf215546Sopenharmony_ci#if GFX_VER >= 7 9225bf215546Sopenharmony_ci screen->vtbl.finish_batch = crocus_state_finish_batch; 9226bf215546Sopenharmony_ci#endif 9227bf215546Sopenharmony_ci#if GFX_VER <= 5 9228bf215546Sopenharmony_ci screen->vtbl.upload_urb_fence = crocus_upload_urb_fence; 9229bf215546Sopenharmony_ci screen->vtbl.calculate_urb_fence = crocus_calculate_urb_fence; 9230bf215546Sopenharmony_ci#endif 9231bf215546Sopenharmony_ci screen->vtbl.fill_clamp_mask = crocus_fill_clamp_mask; 9232bf215546Sopenharmony_ci screen->vtbl.batch_reset_dirty = crocus_batch_reset_dirty; 9233bf215546Sopenharmony_ci screen->vtbl.translate_prim_type = translate_prim_type; 9234bf215546Sopenharmony_ci#if GFX_VER >= 6 9235bf215546Sopenharmony_ci screen->vtbl.update_so_strides = update_so_strides; 9236bf215546Sopenharmony_ci screen->vtbl.get_so_offset = crocus_get_so_offset; 9237bf215546Sopenharmony_ci#endif 9238bf215546Sopenharmony_ci 9239bf215546Sopenharmony_ci genX(crocus_init_blt)(screen); 9240bf215546Sopenharmony_ci} 9241bf215546Sopenharmony_ci 9242bf215546Sopenharmony_civoid 9243bf215546Sopenharmony_cigenX(crocus_init_state)(struct crocus_context *ice) 9244bf215546Sopenharmony_ci{ 9245bf215546Sopenharmony_ci struct pipe_context *ctx = &ice->ctx; 9246bf215546Sopenharmony_ci 9247bf215546Sopenharmony_ci ctx->create_blend_state = crocus_create_blend_state; 9248bf215546Sopenharmony_ci ctx->create_depth_stencil_alpha_state = crocus_create_zsa_state; 9249bf215546Sopenharmony_ci ctx->create_rasterizer_state = crocus_create_rasterizer_state; 9250bf215546Sopenharmony_ci ctx->create_sampler_state = crocus_create_sampler_state; 9251bf215546Sopenharmony_ci ctx->create_sampler_view = crocus_create_sampler_view; 9252bf215546Sopenharmony_ci ctx->create_surface = crocus_create_surface; 9253bf215546Sopenharmony_ci ctx->create_vertex_elements_state = crocus_create_vertex_elements; 9254bf215546Sopenharmony_ci ctx->bind_blend_state = crocus_bind_blend_state; 9255bf215546Sopenharmony_ci ctx->bind_depth_stencil_alpha_state = crocus_bind_zsa_state; 9256bf215546Sopenharmony_ci ctx->bind_sampler_states = crocus_bind_sampler_states; 9257bf215546Sopenharmony_ci ctx->bind_rasterizer_state = crocus_bind_rasterizer_state; 9258bf215546Sopenharmony_ci ctx->bind_vertex_elements_state = crocus_bind_vertex_elements_state; 9259bf215546Sopenharmony_ci ctx->delete_blend_state = crocus_delete_state; 9260bf215546Sopenharmony_ci ctx->delete_depth_stencil_alpha_state = crocus_delete_state; 9261bf215546Sopenharmony_ci ctx->delete_rasterizer_state = crocus_delete_state; 9262bf215546Sopenharmony_ci ctx->delete_sampler_state = crocus_delete_state; 9263bf215546Sopenharmony_ci ctx->delete_vertex_elements_state = crocus_delete_state; 9264bf215546Sopenharmony_ci ctx->set_blend_color = crocus_set_blend_color; 9265bf215546Sopenharmony_ci ctx->set_clip_state = crocus_set_clip_state; 9266bf215546Sopenharmony_ci ctx->set_constant_buffer = crocus_set_constant_buffer; 9267bf215546Sopenharmony_ci ctx->set_shader_buffers = crocus_set_shader_buffers; 9268bf215546Sopenharmony_ci ctx->set_shader_images = crocus_set_shader_images; 9269bf215546Sopenharmony_ci ctx->set_sampler_views = crocus_set_sampler_views; 9270bf215546Sopenharmony_ci ctx->set_tess_state = crocus_set_tess_state; 9271bf215546Sopenharmony_ci ctx->set_patch_vertices = crocus_set_patch_vertices; 9272bf215546Sopenharmony_ci ctx->set_framebuffer_state = crocus_set_framebuffer_state; 9273bf215546Sopenharmony_ci ctx->set_polygon_stipple = crocus_set_polygon_stipple; 9274bf215546Sopenharmony_ci ctx->set_sample_mask = crocus_set_sample_mask; 9275bf215546Sopenharmony_ci ctx->set_scissor_states = crocus_set_scissor_states; 9276bf215546Sopenharmony_ci ctx->set_stencil_ref = crocus_set_stencil_ref; 9277bf215546Sopenharmony_ci ctx->set_vertex_buffers = crocus_set_vertex_buffers; 9278bf215546Sopenharmony_ci ctx->set_viewport_states = crocus_set_viewport_states; 9279bf215546Sopenharmony_ci ctx->sampler_view_destroy = crocus_sampler_view_destroy; 9280bf215546Sopenharmony_ci ctx->surface_destroy = crocus_surface_destroy; 9281bf215546Sopenharmony_ci ctx->draw_vbo = crocus_draw_vbo; 9282bf215546Sopenharmony_ci ctx->launch_grid = crocus_launch_grid; 9283bf215546Sopenharmony_ci 9284bf215546Sopenharmony_ci ctx->set_frontend_noop = crocus_set_frontend_noop; 9285bf215546Sopenharmony_ci 9286bf215546Sopenharmony_ci#if GFX_VER >= 6 9287bf215546Sopenharmony_ci ctx->create_stream_output_target = crocus_create_stream_output_target; 9288bf215546Sopenharmony_ci ctx->stream_output_target_destroy = crocus_stream_output_target_destroy; 9289bf215546Sopenharmony_ci ctx->set_stream_output_targets = crocus_set_stream_output_targets; 9290bf215546Sopenharmony_ci#endif 9291bf215546Sopenharmony_ci 9292bf215546Sopenharmony_ci ice->state.dirty = ~0ull; 9293bf215546Sopenharmony_ci ice->state.stage_dirty = ~0ull; 9294bf215546Sopenharmony_ci 9295bf215546Sopenharmony_ci ice->state.statistics_counters_enabled = true; 9296bf215546Sopenharmony_ci 9297bf215546Sopenharmony_ci ice->state.sample_mask = 0xff; 9298bf215546Sopenharmony_ci ice->state.num_viewports = 1; 9299bf215546Sopenharmony_ci ice->state.prim_mode = PIPE_PRIM_MAX; 9300bf215546Sopenharmony_ci ice->state.reduced_prim_mode = PIPE_PRIM_MAX; 9301bf215546Sopenharmony_ci ice->state.genx = calloc(1, sizeof(struct crocus_genx_state)); 9302bf215546Sopenharmony_ci ice->draw.derived_params.drawid = -1; 9303bf215546Sopenharmony_ci 9304bf215546Sopenharmony_ci /* Default all scissor rectangles to be empty regions. */ 9305bf215546Sopenharmony_ci for (int i = 0; i < CROCUS_MAX_VIEWPORTS; i++) { 9306bf215546Sopenharmony_ci ice->state.scissors[i] = (struct pipe_scissor_state) { 9307bf215546Sopenharmony_ci .minx = 1, .maxx = 0, .miny = 1, .maxy = 0, 9308bf215546Sopenharmony_ci }; 9309bf215546Sopenharmony_ci } 9310bf215546Sopenharmony_ci} 9311