1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included
12bf215546Sopenharmony_ci * in all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci/**
24bf215546Sopenharmony_ci * @file crocus_state.c
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci * ============================= GENXML CODE =============================
27bf215546Sopenharmony_ci *              [This file is compiled once per generation.]
28bf215546Sopenharmony_ci * =======================================================================
29bf215546Sopenharmony_ci *
30bf215546Sopenharmony_ci * This is the main state upload code.
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * Gallium uses Constant State Objects, or CSOs, for most state.  Large,
33bf215546Sopenharmony_ci * complex, or highly reusable state can be created once, and bound and
34bf215546Sopenharmony_ci * rebound multiple times.  This is modeled with the pipe->create_*_state()
35bf215546Sopenharmony_ci * and pipe->bind_*_state() hooks.  Highly dynamic or inexpensive state is
36bf215546Sopenharmony_ci * streamed out on the fly, via pipe->set_*_state() hooks.
37bf215546Sopenharmony_ci *
38bf215546Sopenharmony_ci * OpenGL involves frequently mutating context state, which is mirrored in
39bf215546Sopenharmony_ci * core Mesa by highly mutable data structures.  However, most applications
40bf215546Sopenharmony_ci * typically draw the same things over and over - from frame to frame, most
41bf215546Sopenharmony_ci * of the same objects are still visible and need to be redrawn.  So, rather
42bf215546Sopenharmony_ci * than inventing new state all the time, applications usually mutate to swap
43bf215546Sopenharmony_ci * between known states that we've seen before.
44bf215546Sopenharmony_ci *
45bf215546Sopenharmony_ci * Gallium isolates us from this mutation by tracking API state, and
46bf215546Sopenharmony_ci * distilling it into a set of Constant State Objects, or CSOs.  Large,
47bf215546Sopenharmony_ci * complex, or typically reusable state can be created once, then reused
48bf215546Sopenharmony_ci * multiple times.  Drivers can create and store their own associated data.
49bf215546Sopenharmony_ci * This create/bind model corresponds to the pipe->create_*_state() and
50bf215546Sopenharmony_ci * pipe->bind_*_state() driver hooks.
51bf215546Sopenharmony_ci *
52bf215546Sopenharmony_ci * Some state is cheap to create, or expected to be highly dynamic.  Rather
53bf215546Sopenharmony_ci * than creating and caching piles of CSOs for these, Gallium simply streams
54bf215546Sopenharmony_ci * them out, via the pipe->set_*_state() driver hooks.
55bf215546Sopenharmony_ci *
56bf215546Sopenharmony_ci * To reduce draw time overhead, we try to compute as much state at create
57bf215546Sopenharmony_ci * time as possible.  Wherever possible, we translate the Gallium pipe state
58bf215546Sopenharmony_ci * to 3DSTATE commands, and store those commands in the CSO.  At draw time,
59bf215546Sopenharmony_ci * we can simply memcpy them into a batch buffer.
60bf215546Sopenharmony_ci *
61bf215546Sopenharmony_ci * No hardware matches the abstraction perfectly, so some commands require
62bf215546Sopenharmony_ci * information from multiple CSOs.  In this case, we can store two copies
63bf215546Sopenharmony_ci * of the packet (one in each CSO), and simply | together their DWords at
64bf215546Sopenharmony_ci * draw time.  Sometimes the second set is trivial (one or two fields), so
65bf215546Sopenharmony_ci * we simply pack it at draw time.
66bf215546Sopenharmony_ci *
67bf215546Sopenharmony_ci * There are two main components in the file below.  First, the CSO hooks
68bf215546Sopenharmony_ci * create/bind/track state.  The second are the draw-time upload functions,
69bf215546Sopenharmony_ci * crocus_upload_render_state() and crocus_upload_compute_state(), which read
70bf215546Sopenharmony_ci * the context state and emit the commands into the actual batch.
71bf215546Sopenharmony_ci */
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci#include <errno.h>
74bf215546Sopenharmony_ci#include <stdio.h>
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci#if HAVE_VALGRIND
77bf215546Sopenharmony_ci#include <memcheck.h>
78bf215546Sopenharmony_ci#include <valgrind.h>
79bf215546Sopenharmony_ci#define VG(x) x
80bf215546Sopenharmony_ci#ifdef DEBUG
81bf215546Sopenharmony_ci#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
82bf215546Sopenharmony_ci#endif
83bf215546Sopenharmony_ci#else
84bf215546Sopenharmony_ci#define VG(x)
85bf215546Sopenharmony_ci#endif
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci#include "drm-uapi/i915_drm.h"
88bf215546Sopenharmony_ci#include "intel/common/intel_l3_config.h"
89bf215546Sopenharmony_ci#include "intel/common/intel_sample_positions.h"
90bf215546Sopenharmony_ci#include "intel/compiler/brw_compiler.h"
91bf215546Sopenharmony_ci#include "compiler/shader_info.h"
92bf215546Sopenharmony_ci#include "pipe/p_context.h"
93bf215546Sopenharmony_ci#include "pipe/p_defines.h"
94bf215546Sopenharmony_ci#include "pipe/p_screen.h"
95bf215546Sopenharmony_ci#include "pipe/p_state.h"
96bf215546Sopenharmony_ci#include "util/format/u_format.h"
97bf215546Sopenharmony_ci#include "util/half_float.h"
98bf215546Sopenharmony_ci#include "util/u_dual_blend.h"
99bf215546Sopenharmony_ci#include "util/u_framebuffer.h"
100bf215546Sopenharmony_ci#include "util/u_helpers.h"
101bf215546Sopenharmony_ci#include "util/u_inlines.h"
102bf215546Sopenharmony_ci#include "util/u_memory.h"
103bf215546Sopenharmony_ci#include "util/u_prim.h"
104bf215546Sopenharmony_ci#include "util/u_transfer.h"
105bf215546Sopenharmony_ci#include "util/u_upload_mgr.h"
106bf215546Sopenharmony_ci#include "util/u_viewport.h"
107bf215546Sopenharmony_ci#include "crocus_batch.h"
108bf215546Sopenharmony_ci#include "crocus_context.h"
109bf215546Sopenharmony_ci#include "crocus_defines.h"
110bf215546Sopenharmony_ci#include "crocus_pipe.h"
111bf215546Sopenharmony_ci#include "crocus_resource.h"
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci#include "crocus_genx_macros.h"
114bf215546Sopenharmony_ci#include "intel/common/intel_guardband.h"
115bf215546Sopenharmony_ci#include "main/macros.h" /* UNCLAMPED_* */
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci/**
118bf215546Sopenharmony_ci * Statically assert that PIPE_* enums match the hardware packets.
119bf215546Sopenharmony_ci * (As long as they match, we don't need to translate them.)
120bf215546Sopenharmony_ci */
121bf215546Sopenharmony_ciUNUSED static void pipe_asserts()
122bf215546Sopenharmony_ci{
123bf215546Sopenharmony_ci#define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci   /* pipe_logicop happens to match the hardware. */
126bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
127bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
128bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
129bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
130bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
131bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
132bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
133bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
134bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
135bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
136bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
137bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
138bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
139bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
140bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
141bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   /* pipe_blend_func happens to match the hardware. */
144bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
145bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
146bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
147bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
148bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
149bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
150bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
151bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
152bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
153bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
154bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
155bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
156bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
157bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
158bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
159bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
160bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
161bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
162bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   /* pipe_blend_func happens to match the hardware. */
165bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
166bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
167bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
168bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
169bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   /* pipe_stencil_op happens to match the hardware. */
172bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
173bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
174bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
175bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
176bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
177bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
178bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
179bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci#if GFX_VER >= 6
182bf215546Sopenharmony_ci   /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */
183bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);
184bf215546Sopenharmony_ci   PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);
185bf215546Sopenharmony_ci#endif
186bf215546Sopenharmony_ci#undef PIPE_ASSERT
187bf215546Sopenharmony_ci}
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_cistatic unsigned
190bf215546Sopenharmony_citranslate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
191bf215546Sopenharmony_ci{
192bf215546Sopenharmony_ci   static const unsigned map[] = {
193bf215546Sopenharmony_ci      [PIPE_PRIM_POINTS]                   = _3DPRIM_POINTLIST,
194bf215546Sopenharmony_ci      [PIPE_PRIM_LINES]                    = _3DPRIM_LINELIST,
195bf215546Sopenharmony_ci      [PIPE_PRIM_LINE_LOOP]                = _3DPRIM_LINELOOP,
196bf215546Sopenharmony_ci      [PIPE_PRIM_LINE_STRIP]               = _3DPRIM_LINESTRIP,
197bf215546Sopenharmony_ci      [PIPE_PRIM_TRIANGLES]                = _3DPRIM_TRILIST,
198bf215546Sopenharmony_ci      [PIPE_PRIM_TRIANGLE_STRIP]           = _3DPRIM_TRISTRIP,
199bf215546Sopenharmony_ci      [PIPE_PRIM_TRIANGLE_FAN]             = _3DPRIM_TRIFAN,
200bf215546Sopenharmony_ci      [PIPE_PRIM_QUADS]                    = _3DPRIM_QUADLIST,
201bf215546Sopenharmony_ci      [PIPE_PRIM_QUAD_STRIP]               = _3DPRIM_QUADSTRIP,
202bf215546Sopenharmony_ci      [PIPE_PRIM_POLYGON]                  = _3DPRIM_POLYGON,
203bf215546Sopenharmony_ci#if GFX_VER >= 6
204bf215546Sopenharmony_ci      [PIPE_PRIM_LINES_ADJACENCY]          = _3DPRIM_LINELIST_ADJ,
205bf215546Sopenharmony_ci      [PIPE_PRIM_LINE_STRIP_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
206bf215546Sopenharmony_ci      [PIPE_PRIM_TRIANGLES_ADJACENCY]      = _3DPRIM_TRILIST_ADJ,
207bf215546Sopenharmony_ci      [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
208bf215546Sopenharmony_ci#endif
209bf215546Sopenharmony_ci#if GFX_VER >= 7
210bf215546Sopenharmony_ci      [PIPE_PRIM_PATCHES]                  = _3DPRIM_PATCHLIST_1 - 1,
211bf215546Sopenharmony_ci#endif
212bf215546Sopenharmony_ci   };
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
215bf215546Sopenharmony_ci}
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_cistatic unsigned
218bf215546Sopenharmony_citranslate_compare_func(enum pipe_compare_func pipe_func)
219bf215546Sopenharmony_ci{
220bf215546Sopenharmony_ci   static const unsigned map[] = {
221bf215546Sopenharmony_ci      [PIPE_FUNC_NEVER]    = COMPAREFUNCTION_NEVER,
222bf215546Sopenharmony_ci      [PIPE_FUNC_LESS]     = COMPAREFUNCTION_LESS,
223bf215546Sopenharmony_ci      [PIPE_FUNC_EQUAL]    = COMPAREFUNCTION_EQUAL,
224bf215546Sopenharmony_ci      [PIPE_FUNC_LEQUAL]   = COMPAREFUNCTION_LEQUAL,
225bf215546Sopenharmony_ci      [PIPE_FUNC_GREATER]  = COMPAREFUNCTION_GREATER,
226bf215546Sopenharmony_ci      [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
227bf215546Sopenharmony_ci      [PIPE_FUNC_GEQUAL]   = COMPAREFUNCTION_GEQUAL,
228bf215546Sopenharmony_ci      [PIPE_FUNC_ALWAYS]   = COMPAREFUNCTION_ALWAYS,
229bf215546Sopenharmony_ci   };
230bf215546Sopenharmony_ci   return map[pipe_func];
231bf215546Sopenharmony_ci}
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_cistatic unsigned
234bf215546Sopenharmony_citranslate_shadow_func(enum pipe_compare_func pipe_func)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   /* Gallium specifies the result of shadow comparisons as:
237bf215546Sopenharmony_ci    *
238bf215546Sopenharmony_ci    *    1 if ref <op> texel,
239bf215546Sopenharmony_ci    *    0 otherwise.
240bf215546Sopenharmony_ci    *
241bf215546Sopenharmony_ci    * The hardware does:
242bf215546Sopenharmony_ci    *
243bf215546Sopenharmony_ci    *    0 if texel <op> ref,
244bf215546Sopenharmony_ci    *    1 otherwise.
245bf215546Sopenharmony_ci    *
246bf215546Sopenharmony_ci    * So we need to flip the operator and also negate.
247bf215546Sopenharmony_ci    */
248bf215546Sopenharmony_ci   static const unsigned map[] = {
249bf215546Sopenharmony_ci      [PIPE_FUNC_NEVER]    = PREFILTEROP_ALWAYS,
250bf215546Sopenharmony_ci      [PIPE_FUNC_LESS]     = PREFILTEROP_LEQUAL,
251bf215546Sopenharmony_ci      [PIPE_FUNC_EQUAL]    = PREFILTEROP_NOTEQUAL,
252bf215546Sopenharmony_ci      [PIPE_FUNC_LEQUAL]   = PREFILTEROP_LESS,
253bf215546Sopenharmony_ci      [PIPE_FUNC_GREATER]  = PREFILTEROP_GEQUAL,
254bf215546Sopenharmony_ci      [PIPE_FUNC_NOTEQUAL] = PREFILTEROP_EQUAL,
255bf215546Sopenharmony_ci      [PIPE_FUNC_GEQUAL]   = PREFILTEROP_GREATER,
256bf215546Sopenharmony_ci      [PIPE_FUNC_ALWAYS]   = PREFILTEROP_NEVER,
257bf215546Sopenharmony_ci   };
258bf215546Sopenharmony_ci   return map[pipe_func];
259bf215546Sopenharmony_ci}
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_cistatic unsigned
262bf215546Sopenharmony_citranslate_cull_mode(unsigned pipe_face)
263bf215546Sopenharmony_ci{
264bf215546Sopenharmony_ci   static const unsigned map[4] = {
265bf215546Sopenharmony_ci      [PIPE_FACE_NONE]           = CULLMODE_NONE,
266bf215546Sopenharmony_ci      [PIPE_FACE_FRONT]          = CULLMODE_FRONT,
267bf215546Sopenharmony_ci      [PIPE_FACE_BACK]           = CULLMODE_BACK,
268bf215546Sopenharmony_ci      [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
269bf215546Sopenharmony_ci   };
270bf215546Sopenharmony_ci   return map[pipe_face];
271bf215546Sopenharmony_ci}
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci#if GFX_VER >= 6
274bf215546Sopenharmony_cistatic unsigned
275bf215546Sopenharmony_citranslate_fill_mode(unsigned pipe_polymode)
276bf215546Sopenharmony_ci{
277bf215546Sopenharmony_ci   static const unsigned map[4] = {
278bf215546Sopenharmony_ci      [PIPE_POLYGON_MODE_FILL]           = FILL_MODE_SOLID,
279bf215546Sopenharmony_ci      [PIPE_POLYGON_MODE_LINE]           = FILL_MODE_WIREFRAME,
280bf215546Sopenharmony_ci      [PIPE_POLYGON_MODE_POINT]          = FILL_MODE_POINT,
281bf215546Sopenharmony_ci      [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
282bf215546Sopenharmony_ci   };
283bf215546Sopenharmony_ci   return map[pipe_polymode];
284bf215546Sopenharmony_ci}
285bf215546Sopenharmony_ci#endif
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_cistatic unsigned
288bf215546Sopenharmony_citranslate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
289bf215546Sopenharmony_ci{
290bf215546Sopenharmony_ci   static const unsigned map[] = {
291bf215546Sopenharmony_ci      [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
292bf215546Sopenharmony_ci      [PIPE_TEX_MIPFILTER_LINEAR]  = MIPFILTER_LINEAR,
293bf215546Sopenharmony_ci      [PIPE_TEX_MIPFILTER_NONE]    = MIPFILTER_NONE,
294bf215546Sopenharmony_ci   };
295bf215546Sopenharmony_ci   return map[pipe_mip];
296bf215546Sopenharmony_ci}
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_cistatic uint32_t
299bf215546Sopenharmony_citranslate_wrap(unsigned pipe_wrap, bool either_nearest)
300bf215546Sopenharmony_ci{
301bf215546Sopenharmony_ci   static const unsigned map[] = {
302bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_REPEAT]                 = TCM_WRAP,
303bf215546Sopenharmony_ci#if GFX_VER == 8
304bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_CLAMP]                  = TCM_HALF_BORDER,
305bf215546Sopenharmony_ci#else
306bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_CLAMP]                  = TCM_CLAMP_BORDER,
307bf215546Sopenharmony_ci#endif
308bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_CLAMP_TO_EDGE]          = TCM_CLAMP,
309bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_CLAMP_TO_BORDER]        = TCM_CLAMP_BORDER,
310bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_MIRROR_REPEAT]          = TCM_MIRROR,
311bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE]   = TCM_MIRROR_ONCE,
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci      /* These are unsupported. */
314bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_MIRROR_CLAMP]           = -1,
315bf215546Sopenharmony_ci      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
316bf215546Sopenharmony_ci   };
317bf215546Sopenharmony_ci#if GFX_VER < 8
318bf215546Sopenharmony_ci   if (pipe_wrap == PIPE_TEX_WRAP_CLAMP && either_nearest)
319bf215546Sopenharmony_ci      return TCM_CLAMP;
320bf215546Sopenharmony_ci#endif
321bf215546Sopenharmony_ci   return map[pipe_wrap];
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci/**
325bf215546Sopenharmony_ci * Equiv if brw_state_batch
326bf215546Sopenharmony_ci */
327bf215546Sopenharmony_cistatic uint32_t *
328bf215546Sopenharmony_cistream_state(struct crocus_batch *batch,
329bf215546Sopenharmony_ci             unsigned size,
330bf215546Sopenharmony_ci             unsigned alignment,
331bf215546Sopenharmony_ci             uint32_t *out_offset)
332bf215546Sopenharmony_ci{
333bf215546Sopenharmony_ci   uint32_t offset = ALIGN(batch->state.used, alignment);
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   if (offset + size >= STATE_SZ && !batch->no_wrap) {
336bf215546Sopenharmony_ci      crocus_batch_flush(batch);
337bf215546Sopenharmony_ci      offset = ALIGN(batch->state.used, alignment);
338bf215546Sopenharmony_ci   } else if (offset + size >= batch->state.bo->size) {
339bf215546Sopenharmony_ci      const unsigned new_size =
340bf215546Sopenharmony_ci         MIN2(batch->state.bo->size + batch->state.bo->size / 2,
341bf215546Sopenharmony_ci              MAX_STATE_SIZE);
342bf215546Sopenharmony_ci      crocus_grow_buffer(batch, true, batch->state.used, new_size);
343bf215546Sopenharmony_ci      assert(offset + size < batch->state.bo->size);
344bf215546Sopenharmony_ci   }
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   crocus_record_state_size(batch->state_sizes, offset, size);
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   batch->state.used = offset + size;
349bf215546Sopenharmony_ci   *out_offset = offset;
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   return (uint32_t *)batch->state.map + (offset >> 2);
352bf215546Sopenharmony_ci}
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci/**
355bf215546Sopenharmony_ci * stream_state() + memcpy.
356bf215546Sopenharmony_ci */
357bf215546Sopenharmony_cistatic uint32_t
358bf215546Sopenharmony_ciemit_state(struct crocus_batch *batch, const void *data, unsigned size,
359bf215546Sopenharmony_ci           unsigned alignment)
360bf215546Sopenharmony_ci{
361bf215546Sopenharmony_ci   unsigned offset = 0;
362bf215546Sopenharmony_ci   uint32_t *map = stream_state(batch, size, alignment, &offset);
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   if (map)
365bf215546Sopenharmony_ci      memcpy(map, data, size);
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci   return offset;
368bf215546Sopenharmony_ci}
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci#if GFX_VER <= 5
371bf215546Sopenharmony_cistatic void
372bf215546Sopenharmony_ciupload_pipelined_state_pointers(struct crocus_batch *batch,
373bf215546Sopenharmony_ci                                bool gs_active, uint32_t gs_offset,
374bf215546Sopenharmony_ci                                uint32_t vs_offset, uint32_t sf_offset,
375bf215546Sopenharmony_ci                                uint32_t clip_offset, uint32_t wm_offset, uint32_t cc_offset)
376bf215546Sopenharmony_ci{
377bf215546Sopenharmony_ci#if GFX_VER == 5
378bf215546Sopenharmony_ci   /* Need to flush before changing clip max threads for errata. */
379bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_FLUSH), foo);
380bf215546Sopenharmony_ci#endif
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
383bf215546Sopenharmony_ci      pp.PointertoVSState = ro_bo(batch->state.bo, vs_offset);
384bf215546Sopenharmony_ci      pp.GSEnable = gs_active;
385bf215546Sopenharmony_ci      if (gs_active)
386bf215546Sopenharmony_ci         pp.PointertoGSState = ro_bo(batch->state.bo, gs_offset);
387bf215546Sopenharmony_ci      pp.ClipEnable = true;
388bf215546Sopenharmony_ci      pp.PointertoCLIPState = ro_bo(batch->state.bo, clip_offset);
389bf215546Sopenharmony_ci      pp.PointertoSFState = ro_bo(batch->state.bo, sf_offset);
390bf215546Sopenharmony_ci      pp.PointertoWMState = ro_bo(batch->state.bo, wm_offset);
391bf215546Sopenharmony_ci      pp.PointertoColorCalcState = ro_bo(batch->state.bo, cc_offset);
392bf215546Sopenharmony_ci   }
393bf215546Sopenharmony_ci}
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci#endif
396bf215546Sopenharmony_ci/**
397bf215546Sopenharmony_ci * Did field 'x' change between 'old_cso' and 'new_cso'?
398bf215546Sopenharmony_ci *
399bf215546Sopenharmony_ci * (If so, we may want to set some dirty flags.)
400bf215546Sopenharmony_ci */
401bf215546Sopenharmony_ci#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x))
402bf215546Sopenharmony_ci#define cso_changed_memcmp(x) \
403bf215546Sopenharmony_ci   (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_cistatic void
406bf215546Sopenharmony_ciflush_before_state_base_change(struct crocus_batch *batch)
407bf215546Sopenharmony_ci{
408bf215546Sopenharmony_ci#if GFX_VER >= 6
409bf215546Sopenharmony_ci   /* Flush before emitting STATE_BASE_ADDRESS.
410bf215546Sopenharmony_ci    *
411bf215546Sopenharmony_ci    * This isn't documented anywhere in the PRM.  However, it seems to be
412bf215546Sopenharmony_ci    * necessary prior to changing the surface state base adress.  We've
413bf215546Sopenharmony_ci    * seen issues in Vulkan where we get GPU hangs when using multi-level
414bf215546Sopenharmony_ci    * command buffers which clear depth, reset state base address, and then
415bf215546Sopenharmony_ci    * go render stuff.
416bf215546Sopenharmony_ci    *
417bf215546Sopenharmony_ci    * Normally, in GL, we would trust the kernel to do sufficient stalls
418bf215546Sopenharmony_ci    * and flushes prior to executing our batch.  However, it doesn't seem
419bf215546Sopenharmony_ci    * as if the kernel's flushing is always sufficient and we don't want to
420bf215546Sopenharmony_ci    * rely on it.
421bf215546Sopenharmony_ci    *
422bf215546Sopenharmony_ci    * We make this an end-of-pipe sync instead of a normal flush because we
423bf215546Sopenharmony_ci    * do not know the current status of the GPU.  On Haswell at least,
424bf215546Sopenharmony_ci    * having a fast-clear operation in flight at the same time as a normal
425bf215546Sopenharmony_ci    * rendering operation can cause hangs.  Since the kernel's flushing is
426bf215546Sopenharmony_ci    * insufficient, we need to ensure that any rendering operations from
427bf215546Sopenharmony_ci    * other processes are definitely complete before we try to do our own
428bf215546Sopenharmony_ci    * rendering.  It's a bit of a big hammer but it appears to work.
429bf215546Sopenharmony_ci    */
430bf215546Sopenharmony_ci   const unsigned dc_flush =
431bf215546Sopenharmony_ci      GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
432bf215546Sopenharmony_ci   crocus_emit_end_of_pipe_sync(batch,
433bf215546Sopenharmony_ci                                "change STATE_BASE_ADDRESS (flushes)",
434bf215546Sopenharmony_ci                                PIPE_CONTROL_RENDER_TARGET_FLUSH |
435bf215546Sopenharmony_ci                                dc_flush |
436bf215546Sopenharmony_ci                                PIPE_CONTROL_DEPTH_CACHE_FLUSH);
437bf215546Sopenharmony_ci#endif
438bf215546Sopenharmony_ci}
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_cistatic void
441bf215546Sopenharmony_ciflush_after_state_base_change(struct crocus_batch *batch)
442bf215546Sopenharmony_ci{
443bf215546Sopenharmony_ci   /* After re-setting the surface state base address, we have to do some
444bf215546Sopenharmony_ci    * cache flusing so that the sampler engine will pick up the new
445bf215546Sopenharmony_ci    * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
446bf215546Sopenharmony_ci    * Shared Function > 3D Sampler > State > State Caching (page 96):
447bf215546Sopenharmony_ci    *
448bf215546Sopenharmony_ci    *    Coherency with system memory in the state cache, like the texture
449bf215546Sopenharmony_ci    *    cache is handled partially by software. It is expected that the
450bf215546Sopenharmony_ci    *    command stream or shader will issue Cache Flush operation or
451bf215546Sopenharmony_ci    *    Cache_Flush sampler message to ensure that the L1 cache remains
452bf215546Sopenharmony_ci    *    coherent with system memory.
453bf215546Sopenharmony_ci    *
454bf215546Sopenharmony_ci    *    [...]
455bf215546Sopenharmony_ci    *
456bf215546Sopenharmony_ci    *    Whenever the value of the Dynamic_State_Base_Addr,
457bf215546Sopenharmony_ci    *    Surface_State_Base_Addr are altered, the L1 state cache must be
458bf215546Sopenharmony_ci    *    invalidated to ensure the new surface or sampler state is fetched
459bf215546Sopenharmony_ci    *    from system memory.
460bf215546Sopenharmony_ci    *
461bf215546Sopenharmony_ci    * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
462bf215546Sopenharmony_ci    * which, according the PIPE_CONTROL instruction documentation in the
463bf215546Sopenharmony_ci    * Broadwell PRM:
464bf215546Sopenharmony_ci    *
465bf215546Sopenharmony_ci    *    Setting this bit is independent of any other bit in this packet.
466bf215546Sopenharmony_ci    *    This bit controls the invalidation of the L1 and L2 state caches
467bf215546Sopenharmony_ci    *    at the top of the pipe i.e. at the parsing time.
468bf215546Sopenharmony_ci    *
469bf215546Sopenharmony_ci    * Unfortunately, experimentation seems to indicate that state cache
470bf215546Sopenharmony_ci    * invalidation through a PIPE_CONTROL does nothing whatsoever in
471bf215546Sopenharmony_ci    * regards to surface state and binding tables.  In stead, it seems that
472bf215546Sopenharmony_ci    * invalidating the texture cache is what is actually needed.
473bf215546Sopenharmony_ci    *
474bf215546Sopenharmony_ci    * XXX:  As far as we have been able to determine through
475bf215546Sopenharmony_ci    * experimentation, shows that flush the texture cache appears to be
476bf215546Sopenharmony_ci    * sufficient.  The theory here is that all of the sampling/rendering
477bf215546Sopenharmony_ci    * units cache the binding table in the texture cache.  However, we have
478bf215546Sopenharmony_ci    * yet to be able to actually confirm this.
479bf215546Sopenharmony_ci    */
480bf215546Sopenharmony_ci#if GFX_VER >= 6
481bf215546Sopenharmony_ci   crocus_emit_end_of_pipe_sync(batch,
482bf215546Sopenharmony_ci                                "change STATE_BASE_ADDRESS (invalidates)",
483bf215546Sopenharmony_ci                                PIPE_CONTROL_INSTRUCTION_INVALIDATE |
484bf215546Sopenharmony_ci                                PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
485bf215546Sopenharmony_ci                                PIPE_CONTROL_CONST_CACHE_INVALIDATE |
486bf215546Sopenharmony_ci                                PIPE_CONTROL_STATE_CACHE_INVALIDATE);
487bf215546Sopenharmony_ci#endif
488bf215546Sopenharmony_ci}
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci#if GFX_VER >= 6
491bf215546Sopenharmony_cistatic void
492bf215546Sopenharmony_cicrocus_store_register_mem32(struct crocus_batch *batch, uint32_t reg,
493bf215546Sopenharmony_ci                            struct crocus_bo *bo, uint32_t offset,
494bf215546Sopenharmony_ci                            bool predicated)
495bf215546Sopenharmony_ci{
496bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
497bf215546Sopenharmony_ci      srm.RegisterAddress = reg;
498bf215546Sopenharmony_ci      srm.MemoryAddress = ggtt_bo(bo, offset);
499bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
500bf215546Sopenharmony_ci      srm.PredicateEnable = predicated;
501bf215546Sopenharmony_ci#else
502bf215546Sopenharmony_ci      if (predicated)
503bf215546Sopenharmony_ci         unreachable("unsupported predication");
504bf215546Sopenharmony_ci#endif
505bf215546Sopenharmony_ci   }
506bf215546Sopenharmony_ci}
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_cistatic void
509bf215546Sopenharmony_cicrocus_store_register_mem64(struct crocus_batch *batch, uint32_t reg,
510bf215546Sopenharmony_ci                            struct crocus_bo *bo, uint32_t offset,
511bf215546Sopenharmony_ci                            bool predicated)
512bf215546Sopenharmony_ci{
513bf215546Sopenharmony_ci   crocus_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated);
514bf215546Sopenharmony_ci   crocus_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated);
515bf215546Sopenharmony_ci}
516bf215546Sopenharmony_ci#endif
517bf215546Sopenharmony_ci
518bf215546Sopenharmony_ci#if GFX_VER >= 7
519bf215546Sopenharmony_cistatic void
520bf215546Sopenharmony_ci_crocus_emit_lri(struct crocus_batch *batch, uint32_t reg, uint32_t val)
521bf215546Sopenharmony_ci{
522bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
523bf215546Sopenharmony_ci      lri.RegisterOffset = reg;
524bf215546Sopenharmony_ci      lri.DataDWord      = val;
525bf215546Sopenharmony_ci   }
526bf215546Sopenharmony_ci}
527bf215546Sopenharmony_ci#define crocus_emit_lri(b, r, v) _crocus_emit_lri(b, GENX(r##_num), v)
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
530bf215546Sopenharmony_cistatic void
531bf215546Sopenharmony_ci_crocus_emit_lrr(struct crocus_batch *batch, uint32_t dst, uint32_t src)
532bf215546Sopenharmony_ci{
533bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) {
534bf215546Sopenharmony_ci      lrr.SourceRegisterAddress = src;
535bf215546Sopenharmony_ci      lrr.DestinationRegisterAddress = dst;
536bf215546Sopenharmony_ci   }
537bf215546Sopenharmony_ci}
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_cistatic void
540bf215546Sopenharmony_cicrocus_load_register_reg32(struct crocus_batch *batch, uint32_t dst,
541bf215546Sopenharmony_ci                           uint32_t src)
542bf215546Sopenharmony_ci{
543bf215546Sopenharmony_ci   _crocus_emit_lrr(batch, dst, src);
544bf215546Sopenharmony_ci}
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_cistatic void
547bf215546Sopenharmony_cicrocus_load_register_reg64(struct crocus_batch *batch, uint32_t dst,
548bf215546Sopenharmony_ci                           uint32_t src)
549bf215546Sopenharmony_ci{
550bf215546Sopenharmony_ci   _crocus_emit_lrr(batch, dst, src);
551bf215546Sopenharmony_ci   _crocus_emit_lrr(batch, dst + 4, src + 4);
552bf215546Sopenharmony_ci}
553bf215546Sopenharmony_ci#endif
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_cistatic void
556bf215546Sopenharmony_cicrocus_load_register_imm32(struct crocus_batch *batch, uint32_t reg,
557bf215546Sopenharmony_ci                           uint32_t val)
558bf215546Sopenharmony_ci{
559bf215546Sopenharmony_ci   _crocus_emit_lri(batch, reg, val);
560bf215546Sopenharmony_ci}
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_cistatic void
563bf215546Sopenharmony_cicrocus_load_register_imm64(struct crocus_batch *batch, uint32_t reg,
564bf215546Sopenharmony_ci                           uint64_t val)
565bf215546Sopenharmony_ci{
566bf215546Sopenharmony_ci   _crocus_emit_lri(batch, reg + 0, val & 0xffffffff);
567bf215546Sopenharmony_ci   _crocus_emit_lri(batch, reg + 4, val >> 32);
568bf215546Sopenharmony_ci}
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci/**
571bf215546Sopenharmony_ci * Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer.
572bf215546Sopenharmony_ci */
573bf215546Sopenharmony_cistatic void
574bf215546Sopenharmony_cicrocus_load_register_mem32(struct crocus_batch *batch, uint32_t reg,
575bf215546Sopenharmony_ci                           struct crocus_bo *bo, uint32_t offset)
576bf215546Sopenharmony_ci{
577bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
578bf215546Sopenharmony_ci      lrm.RegisterAddress = reg;
579bf215546Sopenharmony_ci      lrm.MemoryAddress = ro_bo(bo, offset);
580bf215546Sopenharmony_ci   }
581bf215546Sopenharmony_ci}
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci/**
584bf215546Sopenharmony_ci * Load a 64-bit value from a buffer into a MMIO register via
585bf215546Sopenharmony_ci * two MI_LOAD_REGISTER_MEM commands.
586bf215546Sopenharmony_ci */
587bf215546Sopenharmony_cistatic void
588bf215546Sopenharmony_cicrocus_load_register_mem64(struct crocus_batch *batch, uint32_t reg,
589bf215546Sopenharmony_ci                           struct crocus_bo *bo, uint32_t offset)
590bf215546Sopenharmony_ci{
591bf215546Sopenharmony_ci   crocus_load_register_mem32(batch, reg + 0, bo, offset + 0);
592bf215546Sopenharmony_ci   crocus_load_register_mem32(batch, reg + 4, bo, offset + 4);
593bf215546Sopenharmony_ci}
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
596bf215546Sopenharmony_cistatic void
597bf215546Sopenharmony_cicrocus_store_data_imm32(struct crocus_batch *batch,
598bf215546Sopenharmony_ci                        struct crocus_bo *bo, uint32_t offset,
599bf215546Sopenharmony_ci                        uint32_t imm)
600bf215546Sopenharmony_ci{
601bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) {
602bf215546Sopenharmony_ci      sdi.Address = rw_bo(bo, offset);
603bf215546Sopenharmony_ci#if GFX_VER >= 6
604bf215546Sopenharmony_ci      sdi.ImmediateData = imm;
605bf215546Sopenharmony_ci#endif
606bf215546Sopenharmony_ci   }
607bf215546Sopenharmony_ci}
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_cistatic void
610bf215546Sopenharmony_cicrocus_store_data_imm64(struct crocus_batch *batch,
611bf215546Sopenharmony_ci                        struct crocus_bo *bo, uint32_t offset,
612bf215546Sopenharmony_ci                        uint64_t imm)
613bf215546Sopenharmony_ci{
614bf215546Sopenharmony_ci   /* Can't use crocus_emit_cmd because MI_STORE_DATA_IMM has a length of
615bf215546Sopenharmony_ci    * 2 in genxml but it's actually variable length and we need 5 DWords.
616bf215546Sopenharmony_ci    */
617bf215546Sopenharmony_ci   void *map = crocus_get_command_space(batch, 4 * 5);
618bf215546Sopenharmony_ci   _crocus_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) {
619bf215546Sopenharmony_ci      sdi.DWordLength = 5 - 2;
620bf215546Sopenharmony_ci      sdi.Address = rw_bo(bo, offset);
621bf215546Sopenharmony_ci#if GFX_VER >= 6
622bf215546Sopenharmony_ci      sdi.ImmediateData = imm;
623bf215546Sopenharmony_ci#endif
624bf215546Sopenharmony_ci   }
625bf215546Sopenharmony_ci}
626bf215546Sopenharmony_ci#endif
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_cistatic void
629bf215546Sopenharmony_cicrocus_copy_mem_mem(struct crocus_batch *batch,
630bf215546Sopenharmony_ci                    struct crocus_bo *dst_bo, uint32_t dst_offset,
631bf215546Sopenharmony_ci                    struct crocus_bo *src_bo, uint32_t src_offset,
632bf215546Sopenharmony_ci                    unsigned bytes)
633bf215546Sopenharmony_ci{
634bf215546Sopenharmony_ci   assert(bytes % 4 == 0);
635bf215546Sopenharmony_ci   assert(dst_offset % 4 == 0);
636bf215546Sopenharmony_ci   assert(src_offset % 4 == 0);
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci#define CROCUS_TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
639bf215546Sopenharmony_ci   for (unsigned i = 0; i < bytes; i += 4) {
640bf215546Sopenharmony_ci      crocus_load_register_mem32(batch, CROCUS_TEMP_REG,
641bf215546Sopenharmony_ci                                 src_bo, src_offset + i);
642bf215546Sopenharmony_ci      crocus_store_register_mem32(batch, CROCUS_TEMP_REG,
643bf215546Sopenharmony_ci                                  dst_bo, dst_offset + i, false);
644bf215546Sopenharmony_ci   }
645bf215546Sopenharmony_ci}
646bf215546Sopenharmony_ci#endif
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci/**
649bf215546Sopenharmony_ci * Gallium CSO for rasterizer state.
650bf215546Sopenharmony_ci */
651bf215546Sopenharmony_cistruct crocus_rasterizer_state {
652bf215546Sopenharmony_ci   struct pipe_rasterizer_state cso;
653bf215546Sopenharmony_ci#if GFX_VER >= 6
654bf215546Sopenharmony_ci   uint32_t sf[GENX(3DSTATE_SF_length)];
655bf215546Sopenharmony_ci   uint32_t clip[GENX(3DSTATE_CLIP_length)];
656bf215546Sopenharmony_ci#endif
657bf215546Sopenharmony_ci#if GFX_VER >= 8
658bf215546Sopenharmony_ci   uint32_t raster[GENX(3DSTATE_RASTER_length)];
659bf215546Sopenharmony_ci#endif
660bf215546Sopenharmony_ci   uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci   uint8_t num_clip_plane_consts;
663bf215546Sopenharmony_ci   bool fill_mode_point_or_line;
664bf215546Sopenharmony_ci};
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci#if GFX_VER <= 5
667bf215546Sopenharmony_ci#define URB_VS 0
668bf215546Sopenharmony_ci#define URB_GS 1
669bf215546Sopenharmony_ci#define URB_CLP 2
670bf215546Sopenharmony_ci#define URB_SF 3
671bf215546Sopenharmony_ci#define URB_CS 4
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_cistatic const struct {
674bf215546Sopenharmony_ci   uint32_t min_nr_entries;
675bf215546Sopenharmony_ci   uint32_t preferred_nr_entries;
676bf215546Sopenharmony_ci   uint32_t min_entry_size;
677bf215546Sopenharmony_ci   uint32_t  max_entry_size;
678bf215546Sopenharmony_ci} limits[URB_CS+1] = {
679bf215546Sopenharmony_ci   { 16, 32, 1, 5 },                        /* vs */
680bf215546Sopenharmony_ci   { 4, 8,  1, 5 },                        /* gs */
681bf215546Sopenharmony_ci   { 5, 10,  1, 5 },                        /* clp */
682bf215546Sopenharmony_ci   { 1, 8,  1, 12 },                        /* sf */
683bf215546Sopenharmony_ci   { 1, 4,  1, 32 }                        /* cs */
684bf215546Sopenharmony_ci};
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_cistatic bool check_urb_layout(struct crocus_context *ice)
687bf215546Sopenharmony_ci{
688bf215546Sopenharmony_ci   ice->urb.vs_start = 0;
689bf215546Sopenharmony_ci   ice->urb.gs_start = ice->urb.nr_vs_entries * ice->urb.vsize;
690bf215546Sopenharmony_ci   ice->urb.clip_start = ice->urb.gs_start + ice->urb.nr_gs_entries * ice->urb.vsize;
691bf215546Sopenharmony_ci   ice->urb.sf_start = ice->urb.clip_start + ice->urb.nr_clip_entries * ice->urb.vsize;
692bf215546Sopenharmony_ci   ice->urb.cs_start = ice->urb.sf_start + ice->urb.nr_sf_entries * ice->urb.sfsize;
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci   return ice->urb.cs_start + ice->urb.nr_cs_entries *
695bf215546Sopenharmony_ci      ice->urb.csize <= ice->urb.size;
696bf215546Sopenharmony_ci}
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_cistatic bool
700bf215546Sopenharmony_cicrocus_calculate_urb_fence(struct crocus_batch *batch, unsigned csize,
701bf215546Sopenharmony_ci                           unsigned vsize, unsigned sfsize)
702bf215546Sopenharmony_ci{
703bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
704bf215546Sopenharmony_ci   if (csize < limits[URB_CS].min_entry_size)
705bf215546Sopenharmony_ci      csize = limits[URB_CS].min_entry_size;
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci   if (vsize < limits[URB_VS].min_entry_size)
708bf215546Sopenharmony_ci      vsize = limits[URB_VS].min_entry_size;
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci   if (sfsize < limits[URB_SF].min_entry_size)
711bf215546Sopenharmony_ci      sfsize = limits[URB_SF].min_entry_size;
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci   if (ice->urb.vsize < vsize ||
714bf215546Sopenharmony_ci       ice->urb.sfsize < sfsize ||
715bf215546Sopenharmony_ci       ice->urb.csize < csize ||
716bf215546Sopenharmony_ci       (ice->urb.constrained && (ice->urb.vsize > vsize ||
717bf215546Sopenharmony_ci                                 ice->urb.sfsize > sfsize ||
718bf215546Sopenharmony_ci                                 ice->urb.csize > csize))) {
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci      ice->urb.csize = csize;
722bf215546Sopenharmony_ci      ice->urb.sfsize = sfsize;
723bf215546Sopenharmony_ci      ice->urb.vsize = vsize;
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci      ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;
726bf215546Sopenharmony_ci      ice->urb.nr_gs_entries = limits[URB_GS].preferred_nr_entries;
727bf215546Sopenharmony_ci      ice->urb.nr_clip_entries = limits[URB_CLP].preferred_nr_entries;
728bf215546Sopenharmony_ci      ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries;
729bf215546Sopenharmony_ci      ice->urb.nr_cs_entries = limits[URB_CS].preferred_nr_entries;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci      ice->urb.constrained = 0;
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci      if (GFX_VER == 5) {
734bf215546Sopenharmony_ci         ice->urb.nr_vs_entries = 128;
735bf215546Sopenharmony_ci         ice->urb.nr_sf_entries = 48;
736bf215546Sopenharmony_ci         if (check_urb_layout(ice)) {
737bf215546Sopenharmony_ci            goto done;
738bf215546Sopenharmony_ci         } else {
739bf215546Sopenharmony_ci            ice->urb.constrained = 1;
740bf215546Sopenharmony_ci            ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;
741bf215546Sopenharmony_ci            ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries;
742bf215546Sopenharmony_ci         }
743bf215546Sopenharmony_ci      } else if (GFX_VERx10 == 45) {
744bf215546Sopenharmony_ci         ice->urb.nr_vs_entries = 64;
745bf215546Sopenharmony_ci         if (check_urb_layout(ice)) {
746bf215546Sopenharmony_ci            goto done;
747bf215546Sopenharmony_ci         } else {
748bf215546Sopenharmony_ci            ice->urb.constrained = 1;
749bf215546Sopenharmony_ci            ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;
750bf215546Sopenharmony_ci         }
751bf215546Sopenharmony_ci      }
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci      if (!check_urb_layout(ice)) {
754bf215546Sopenharmony_ci         ice->urb.nr_vs_entries = limits[URB_VS].min_nr_entries;
755bf215546Sopenharmony_ci         ice->urb.nr_gs_entries = limits[URB_GS].min_nr_entries;
756bf215546Sopenharmony_ci         ice->urb.nr_clip_entries = limits[URB_CLP].min_nr_entries;
757bf215546Sopenharmony_ci         ice->urb.nr_sf_entries = limits[URB_SF].min_nr_entries;
758bf215546Sopenharmony_ci         ice->urb.nr_cs_entries = limits[URB_CS].min_nr_entries;
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci         /* Mark us as operating with constrained nr_entries, so that next
761bf215546Sopenharmony_ci          * time we recalculate we'll resize the fences in the hope of
762bf215546Sopenharmony_ci          * escaping constrained mode and getting back to normal performance.
763bf215546Sopenharmony_ci          */
764bf215546Sopenharmony_ci         ice->urb.constrained = 1;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci         if (!check_urb_layout(ice)) {
767bf215546Sopenharmony_ci            /* This is impossible, given the maximal sizes of urb
768bf215546Sopenharmony_ci             * entries and the values for minimum nr of entries
769bf215546Sopenharmony_ci             * provided above.
770bf215546Sopenharmony_ci             */
771bf215546Sopenharmony_ci            fprintf(stderr, "couldn't calculate URB layout!\n");
772bf215546Sopenharmony_ci            exit(1);
773bf215546Sopenharmony_ci         }
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci         if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF))
776bf215546Sopenharmony_ci            fprintf(stderr, "URB CONSTRAINED\n");
777bf215546Sopenharmony_ci      }
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_cidone:
780bf215546Sopenharmony_ci      if (INTEL_DEBUG(DEBUG_URB))
781bf215546Sopenharmony_ci         fprintf(stderr,
782bf215546Sopenharmony_ci                 "URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
783bf215546Sopenharmony_ci                 ice->urb.vs_start,
784bf215546Sopenharmony_ci                 ice->urb.gs_start,
785bf215546Sopenharmony_ci                 ice->urb.clip_start,
786bf215546Sopenharmony_ci                 ice->urb.sf_start,
787bf215546Sopenharmony_ci                 ice->urb.cs_start,
788bf215546Sopenharmony_ci                 ice->urb.size);
789bf215546Sopenharmony_ci      return true;
790bf215546Sopenharmony_ci   }
791bf215546Sopenharmony_ci   return false;
792bf215546Sopenharmony_ci}
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_cistatic void
795bf215546Sopenharmony_cicrocus_upload_urb_fence(struct crocus_batch *batch)
796bf215546Sopenharmony_ci{
797bf215546Sopenharmony_ci   uint32_t urb_fence[3];
798bf215546Sopenharmony_ci   _crocus_pack_command(batch, GENX(URB_FENCE), urb_fence, urb) {
799bf215546Sopenharmony_ci      urb.VSUnitURBReallocationRequest = 1;
800bf215546Sopenharmony_ci      urb.GSUnitURBReallocationRequest = 1;
801bf215546Sopenharmony_ci      urb.CLIPUnitURBReallocationRequest = 1;
802bf215546Sopenharmony_ci      urb.SFUnitURBReallocationRequest = 1;
803bf215546Sopenharmony_ci      urb.VFEUnitURBReallocationRequest = 1;
804bf215546Sopenharmony_ci      urb.CSUnitURBReallocationRequest = 1;
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci      urb.VSFence = batch->ice->urb.gs_start;
807bf215546Sopenharmony_ci      urb.GSFence = batch->ice->urb.clip_start;
808bf215546Sopenharmony_ci      urb.CLIPFence = batch->ice->urb.sf_start;
809bf215546Sopenharmony_ci      urb.SFFence = batch->ice->urb.cs_start;
810bf215546Sopenharmony_ci      urb.CSFence = batch->ice->urb.size;
811bf215546Sopenharmony_ci   }
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci   /* erratum: URB_FENCE must not cross a 64byte cacheline */
814bf215546Sopenharmony_ci   if ((crocus_batch_bytes_used(batch) & 15) > 12) {
815bf215546Sopenharmony_ci      int pad = 16 - (crocus_batch_bytes_used(batch) & 15);
816bf215546Sopenharmony_ci      do {
817bf215546Sopenharmony_ci         *(uint32_t *)batch->command.map_next = 0;
818bf215546Sopenharmony_ci         batch->command.map_next += sizeof(uint32_t);
819bf215546Sopenharmony_ci      } while (--pad);
820bf215546Sopenharmony_ci   }
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci   crocus_batch_emit(batch, urb_fence, sizeof(uint32_t) * 3);
823bf215546Sopenharmony_ci}
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_cistatic bool
826bf215546Sopenharmony_cicalculate_curbe_offsets(struct crocus_batch *batch)
827bf215546Sopenharmony_ci{
828bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_ci   unsigned nr_fp_regs, nr_vp_regs, nr_clip_regs = 0;
831bf215546Sopenharmony_ci   unsigned total_regs;
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   nr_fp_regs = 0;
834bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
835bf215546Sopenharmony_ci      const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data->ubo_ranges[i];
836bf215546Sopenharmony_ci      if (range->length == 0)
837bf215546Sopenharmony_ci         continue;
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ci      /* ubo range tracks at 256-bit, we need 512-bit */
840bf215546Sopenharmony_ci      nr_fp_regs += (range->length + 1) / 2;
841bf215546Sopenharmony_ci   }
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci   if (ice->state.cso_rast->cso.clip_plane_enable) {
844bf215546Sopenharmony_ci      unsigned nr_planes = 6 + util_bitcount(ice->state.cso_rast->cso.clip_plane_enable);
845bf215546Sopenharmony_ci      nr_clip_regs = (nr_planes * 4 + 15) / 16;
846bf215546Sopenharmony_ci   }
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci   nr_vp_regs = 0;
849bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
850bf215546Sopenharmony_ci      const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data->ubo_ranges[i];
851bf215546Sopenharmony_ci      if (range->length == 0)
852bf215546Sopenharmony_ci         continue;
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci      /* ubo range tracks at 256-bit, we need 512-bit */
855bf215546Sopenharmony_ci      nr_vp_regs += (range->length + 1) / 2;
856bf215546Sopenharmony_ci   }
857bf215546Sopenharmony_ci   if (nr_vp_regs == 0) {
858bf215546Sopenharmony_ci      /* The pre-gen6 VS requires that some push constants get loaded no
859bf215546Sopenharmony_ci       * matter what, or the GPU would hang.
860bf215546Sopenharmony_ci       */
861bf215546Sopenharmony_ci      nr_vp_regs = 1;
862bf215546Sopenharmony_ci   }
863bf215546Sopenharmony_ci   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci   /* The CURBE allocation size is limited to 32 512-bit units (128 EU
866bf215546Sopenharmony_ci    * registers, or 1024 floats).  See CS_URB_STATE in the gen4 or gen5
867bf215546Sopenharmony_ci    * (volume 1, part 1) PRMs.
868bf215546Sopenharmony_ci    *
869bf215546Sopenharmony_ci    * Note that in brw_fs.cpp we're only loading up to 16 EU registers of
870bf215546Sopenharmony_ci    * values as push constants before spilling to pull constants, and in
871bf215546Sopenharmony_ci    * brw_vec4.cpp we're loading up to 32 registers of push constants.  An EU
872bf215546Sopenharmony_ci    * register is 1/2 of one of these URB entry units, so that leaves us 16 EU
873bf215546Sopenharmony_ci    * regs for clip.
874bf215546Sopenharmony_ci    */
875bf215546Sopenharmony_ci   assert(total_regs <= 32);
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci   /* Lazy resize:
878bf215546Sopenharmony_ci    */
879bf215546Sopenharmony_ci   if (nr_fp_regs > ice->curbe.wm_size ||
880bf215546Sopenharmony_ci       nr_vp_regs > ice->curbe.vs_size ||
881bf215546Sopenharmony_ci       nr_clip_regs != ice->curbe.clip_size ||
882bf215546Sopenharmony_ci       (total_regs < ice->curbe.total_size / 4 &&
883bf215546Sopenharmony_ci        ice->curbe.total_size > 16)) {
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci      GLuint reg = 0;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci      /* Calculate a new layout:
888bf215546Sopenharmony_ci       */
889bf215546Sopenharmony_ci      reg = 0;
890bf215546Sopenharmony_ci      ice->curbe.wm_start = reg;
891bf215546Sopenharmony_ci      ice->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
892bf215546Sopenharmony_ci      ice->curbe.clip_start = reg;
893bf215546Sopenharmony_ci      ice->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
894bf215546Sopenharmony_ci      ice->curbe.vs_start = reg;
895bf215546Sopenharmony_ci      ice->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
896bf215546Sopenharmony_ci      ice->curbe.total_size = reg;
897bf215546Sopenharmony_ci
898bf215546Sopenharmony_ci      if (0)
899bf215546Sopenharmony_ci         fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
900bf215546Sopenharmony_ci                 ice->curbe.wm_start,
901bf215546Sopenharmony_ci                 ice->curbe.wm_size,
902bf215546Sopenharmony_ci                 ice->curbe.clip_start,
903bf215546Sopenharmony_ci                 ice->curbe.clip_size,
904bf215546Sopenharmony_ci                 ice->curbe.vs_start,
905bf215546Sopenharmony_ci                 ice->curbe.vs_size );
906bf215546Sopenharmony_ci      return true;
907bf215546Sopenharmony_ci   }
908bf215546Sopenharmony_ci   return false;
909bf215546Sopenharmony_ci}
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_cistatic void
912bf215546Sopenharmony_ciupload_shader_consts(struct crocus_context *ice,
913bf215546Sopenharmony_ci                     gl_shader_stage stage,
914bf215546Sopenharmony_ci                     uint32_t *map,
915bf215546Sopenharmony_ci                     unsigned start)
916bf215546Sopenharmony_ci{
917bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
918bf215546Sopenharmony_ci   struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
919bf215546Sopenharmony_ci   uint32_t *cmap;
920bf215546Sopenharmony_ci   bool found = false;
921bf215546Sopenharmony_ci   unsigned offset = start * 16;
922bf215546Sopenharmony_ci   int total = 0;
923bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
924bf215546Sopenharmony_ci      const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci      if (range->length == 0)
927bf215546Sopenharmony_ci         continue;
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci      unsigned block_index = crocus_bti_to_group_index(
930bf215546Sopenharmony_ci         &shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block);
931bf215546Sopenharmony_ci      unsigned len = range->length * 8 * sizeof(float);
932bf215546Sopenharmony_ci      unsigned start = range->start * 8 * sizeof(float);
933bf215546Sopenharmony_ci      struct pipe_transfer *transfer;
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_ci      cmap = pipe_buffer_map_range(&ice->ctx, ice->state.shaders[stage].constbufs[block_index].buffer,
936bf215546Sopenharmony_ci                                   ice->state.shaders[stage].constbufs[block_index].buffer_offset + start, len,
937bf215546Sopenharmony_ci                                   PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED, &transfer);
938bf215546Sopenharmony_ci      if (cmap)
939bf215546Sopenharmony_ci         memcpy(&map[offset + (total * 8)], cmap, len);
940bf215546Sopenharmony_ci      pipe_buffer_unmap(&ice->ctx, transfer);
941bf215546Sopenharmony_ci      total += range->length;
942bf215546Sopenharmony_ci      found = true;
943bf215546Sopenharmony_ci   }
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci   if (stage == MESA_SHADER_VERTEX && !found) {
946bf215546Sopenharmony_ci      /* The pre-gen6 VS requires that some push constants get loaded no
947bf215546Sopenharmony_ci       * matter what, or the GPU would hang.
948bf215546Sopenharmony_ci       */
949bf215546Sopenharmony_ci      unsigned len = 16;
950bf215546Sopenharmony_ci      memset(&map[offset], 0, len);
951bf215546Sopenharmony_ci   }
952bf215546Sopenharmony_ci}
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_cistatic const float fixed_plane[6][4] = {
955bf215546Sopenharmony_ci   { 0,    0,   -1, 1 },
956bf215546Sopenharmony_ci   { 0,    0,    1, 1 },
957bf215546Sopenharmony_ci   { 0,   -1,    0, 1 },
958bf215546Sopenharmony_ci   { 0,    1,    0, 1 },
959bf215546Sopenharmony_ci   {-1,    0,    0, 1 },
960bf215546Sopenharmony_ci   { 1,    0,    0, 1 }
961bf215546Sopenharmony_ci};
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_cistatic void
964bf215546Sopenharmony_cigen4_upload_curbe(struct crocus_batch *batch)
965bf215546Sopenharmony_ci{
966bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
967bf215546Sopenharmony_ci   const unsigned sz = ice->curbe.total_size;
968bf215546Sopenharmony_ci   const unsigned buf_sz = sz * 16 * sizeof(float);
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   if (sz == 0)
971bf215546Sopenharmony_ci      goto emit;
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci   uint32_t *map;
974bf215546Sopenharmony_ci   u_upload_alloc(ice->ctx.const_uploader, 0, buf_sz, 64,
975bf215546Sopenharmony_ci                  &ice->curbe.curbe_offset, (struct pipe_resource **)&ice->curbe.curbe_res, (void **) &map);
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci   /* fragment shader constants */
978bf215546Sopenharmony_ci   if (ice->curbe.wm_size) {
979bf215546Sopenharmony_ci      upload_shader_consts(ice, MESA_SHADER_FRAGMENT, map, ice->curbe.wm_start);
980bf215546Sopenharmony_ci   }
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci   /* clipper constants */
983bf215546Sopenharmony_ci   if (ice->curbe.clip_size) {
984bf215546Sopenharmony_ci      unsigned offset = ice->curbe.clip_start * 16;
985bf215546Sopenharmony_ci      float *fmap = (float *)map;
986bf215546Sopenharmony_ci      unsigned i;
987bf215546Sopenharmony_ci      /* If any planes are going this way, send them all this way:
988bf215546Sopenharmony_ci       */
989bf215546Sopenharmony_ci      for (i = 0; i < 6; i++) {
990bf215546Sopenharmony_ci         fmap[offset + i * 4 + 0] = fixed_plane[i][0];
991bf215546Sopenharmony_ci         fmap[offset + i * 4 + 1] = fixed_plane[i][1];
992bf215546Sopenharmony_ci         fmap[offset + i * 4 + 2] = fixed_plane[i][2];
993bf215546Sopenharmony_ci         fmap[offset + i * 4 + 3] = fixed_plane[i][3];
994bf215546Sopenharmony_ci      }
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci      unsigned mask = ice->state.cso_rast->cso.clip_plane_enable;
997bf215546Sopenharmony_ci      struct pipe_clip_state *cp = &ice->state.clip_planes;
998bf215546Sopenharmony_ci      while (mask) {
999bf215546Sopenharmony_ci         const int j = u_bit_scan(&mask);
1000bf215546Sopenharmony_ci         fmap[offset + i * 4 + 0] = cp->ucp[j][0];
1001bf215546Sopenharmony_ci         fmap[offset + i * 4 + 1] = cp->ucp[j][1];
1002bf215546Sopenharmony_ci         fmap[offset + i * 4 + 2] = cp->ucp[j][2];
1003bf215546Sopenharmony_ci         fmap[offset + i * 4 + 3] = cp->ucp[j][3];
1004bf215546Sopenharmony_ci         i++;
1005bf215546Sopenharmony_ci      }
1006bf215546Sopenharmony_ci   }
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci   /* vertex shader constants */
1009bf215546Sopenharmony_ci   if (ice->curbe.vs_size) {
1010bf215546Sopenharmony_ci      upload_shader_consts(ice, MESA_SHADER_VERTEX, map, ice->curbe.vs_start);
1011bf215546Sopenharmony_ci   }
1012bf215546Sopenharmony_ci   if (0) {
1013bf215546Sopenharmony_ci      for (int i = 0; i < sz*16; i+=4) {
1014bf215546Sopenharmony_ci         float *f = (float *)map;
1015bf215546Sopenharmony_ci         fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
1016bf215546Sopenharmony_ci                 f[i+0], f[i+1], f[i+2], f[i+3]);
1017bf215546Sopenharmony_ci      }
1018bf215546Sopenharmony_ci   }
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ciemit:
1021bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(CONSTANT_BUFFER), cb) {
1022bf215546Sopenharmony_ci      if (ice->curbe.curbe_res) {
1023bf215546Sopenharmony_ci         cb.BufferLength = ice->curbe.total_size - 1;
1024bf215546Sopenharmony_ci         cb.Valid = 1;
1025bf215546Sopenharmony_ci         cb.BufferStartingAddress = ro_bo(ice->curbe.curbe_res->bo, ice->curbe.curbe_offset);
1026bf215546Sopenharmony_ci      }
1027bf215546Sopenharmony_ci   }
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci#if GFX_VER == 4 && GFX_VERx10 != 45
1030bf215546Sopenharmony_ci   /* Work around a Broadwater/Crestline depth interpolator bug.  The
1031bf215546Sopenharmony_ci    * following sequence will cause GPU hangs:
1032bf215546Sopenharmony_ci    *
1033bf215546Sopenharmony_ci    * 1. Change state so that all depth related fields in CC_STATE are
1034bf215546Sopenharmony_ci    *    disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
1035bf215546Sopenharmony_ci    * 2. Emit a CONSTANT_BUFFER packet.
1036bf215546Sopenharmony_ci    * 3. Draw via 3DPRIMITIVE.
1037bf215546Sopenharmony_ci    *
1038bf215546Sopenharmony_ci    * The recommended workaround is to emit a non-pipelined state change after
1039bf215546Sopenharmony_ci    * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
1040bf215546Sopenharmony_ci    *
1041bf215546Sopenharmony_ci    * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
1042bf215546Sopenharmony_ci    * and always emit it when "PS Use Source Depth" is set.  We could be more
1043bf215546Sopenharmony_ci    * precise, but the additional complexity is probably not worth it.
1044bf215546Sopenharmony_ci    *
1045bf215546Sopenharmony_ci    */
1046bf215546Sopenharmony_ci   const struct shader_info *fs_info =
1047bf215546Sopenharmony_ci      crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_ci   if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
1050bf215546Sopenharmony_ci      ice->state.global_depth_offset_clamp = 0;
1051bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp);
1052bf215546Sopenharmony_ci   }
1053bf215546Sopenharmony_ci#endif
1054bf215546Sopenharmony_ci}
1055bf215546Sopenharmony_ci#endif
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_ci#if GFX_VER >= 7
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci#define IVB_L3SQCREG1_SQGHPCI_DEFAULT     0x00730000
1060bf215546Sopenharmony_ci#define VLV_L3SQCREG1_SQGHPCI_DEFAULT     0x00d30000
1061bf215546Sopenharmony_ci#define HSW_L3SQCREG1_SQGHPCI_DEFAULT     0x00610000
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_cistatic void
1064bf215546Sopenharmony_cisetup_l3_config(struct crocus_batch *batch, const struct intel_l3_config *cfg)
1065bf215546Sopenharmony_ci{
1066bf215546Sopenharmony_ci#if GFX_VER == 7
1067bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &batch->screen->devinfo;
1068bf215546Sopenharmony_ci   const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
1069bf215546Sopenharmony_ci   const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
1070bf215546Sopenharmony_ci                       cfg->n[INTEL_L3P_ALL];
1071bf215546Sopenharmony_ci   const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
1072bf215546Sopenharmony_ci                      cfg->n[INTEL_L3P_ALL];
1073bf215546Sopenharmony_ci   const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
1074bf215546Sopenharmony_ci                      cfg->n[INTEL_L3P_ALL];
1075bf215546Sopenharmony_ci   const bool has_slm = cfg->n[INTEL_L3P_SLM];
1076bf215546Sopenharmony_ci#endif
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci   /* According to the hardware docs, the L3 partitioning can only be changed
1079bf215546Sopenharmony_ci    * while the pipeline is completely drained and the caches are flushed,
1080bf215546Sopenharmony_ci    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
1081bf215546Sopenharmony_ci    */
1082bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch, "l3_config",
1083bf215546Sopenharmony_ci                                  PIPE_CONTROL_DATA_CACHE_FLUSH |
1084bf215546Sopenharmony_ci                                  PIPE_CONTROL_CS_STALL);
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   /* ...followed by a second pipelined PIPE_CONTROL that initiates
1087bf215546Sopenharmony_ci    * invalidation of the relevant caches.  Note that because RO invalidation
1088bf215546Sopenharmony_ci    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
1089bf215546Sopenharmony_ci    * command is processed by the CS) we cannot combine it with the previous
1090bf215546Sopenharmony_ci    * stalling flush as the hardware documentation suggests, because that
1091bf215546Sopenharmony_ci    * would cause the CS to stall on previous rendering *after* RO
1092bf215546Sopenharmony_ci    * invalidation and wouldn't prevent the RO caches from being polluted by
1093bf215546Sopenharmony_ci    * concurrent rendering before the stall completes.  This intentionally
1094bf215546Sopenharmony_ci    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
1095bf215546Sopenharmony_ci    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
1096bf215546Sopenharmony_ci    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
1097bf215546Sopenharmony_ci    * already guarantee that there is no concurrent GPGPU kernel execution
1098bf215546Sopenharmony_ci    * (see SKL HSD 2132585).
1099bf215546Sopenharmony_ci    */
1100bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch, "l3 config",
1101bf215546Sopenharmony_ci                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1102bf215546Sopenharmony_ci                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
1103bf215546Sopenharmony_ci                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE |
1104bf215546Sopenharmony_ci                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE);
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_ci   /* Now send a third stalling flush to make sure that invalidation is
1107bf215546Sopenharmony_ci    * complete when the L3 configuration registers are modified.
1108bf215546Sopenharmony_ci    */
1109bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch, "l3 config",
1110bf215546Sopenharmony_ci                                  PIPE_CONTROL_DATA_CACHE_FLUSH |
1111bf215546Sopenharmony_ci                                  PIPE_CONTROL_CS_STALL);
1112bf215546Sopenharmony_ci
1113bf215546Sopenharmony_ci#if GFX_VER == 8
1114bf215546Sopenharmony_ci   assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]);
1115bf215546Sopenharmony_ci   crocus_emit_reg(batch, GENX(L3CNTLREG), reg) {
1116bf215546Sopenharmony_ci      reg.SLMEnable = cfg->n[INTEL_L3P_SLM] > 0;
1117bf215546Sopenharmony_ci      reg.URBAllocation = cfg->n[INTEL_L3P_URB];
1118bf215546Sopenharmony_ci      reg.ROAllocation = cfg->n[INTEL_L3P_RO];
1119bf215546Sopenharmony_ci      reg.DCAllocation = cfg->n[INTEL_L3P_DC];
1120bf215546Sopenharmony_ci      reg.AllAllocation = cfg->n[INTEL_L3P_ALL];
1121bf215546Sopenharmony_ci   }
1122bf215546Sopenharmony_ci#else
1123bf215546Sopenharmony_ci   assert(!cfg->n[INTEL_L3P_ALL]);
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_ci   /* When enabled SLM only uses a portion of the L3 on half of the banks,
1126bf215546Sopenharmony_ci    * the matching space on the remaining banks has to be allocated to a
1127bf215546Sopenharmony_ci    * client (URB for all validated configurations) set to the
1128bf215546Sopenharmony_ci    * lower-bandwidth 2-bank address hashing mode.
1129bf215546Sopenharmony_ci    */
1130bf215546Sopenharmony_ci   const bool urb_low_bw = has_slm && devinfo->platform != INTEL_PLATFORM_BYT;
1131bf215546Sopenharmony_ci   assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
1132bf215546Sopenharmony_ci
1133bf215546Sopenharmony_ci   /* Minimum number of ways that can be allocated to the URB. */
1134bf215546Sopenharmony_ci   const unsigned n0_urb = (devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0);
1135bf215546Sopenharmony_ci   assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
1136bf215546Sopenharmony_ci
1137bf215546Sopenharmony_ci   uint32_t l3sqcr1, l3cr2, l3cr3;
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_ci   crocus_pack_state(GENX(L3SQCREG1), &l3sqcr1, reg) {
1140bf215546Sopenharmony_ci      reg.ConvertDC_UC = !has_dc;
1141bf215546Sopenharmony_ci      reg.ConvertIS_UC = !has_is;
1142bf215546Sopenharmony_ci      reg.ConvertC_UC = !has_c;
1143bf215546Sopenharmony_ci      reg.ConvertT_UC = !has_t;
1144bf215546Sopenharmony_ci#if GFX_VERx10 == 75
1145bf215546Sopenharmony_ci      reg.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT;
1146bf215546Sopenharmony_ci#else
1147bf215546Sopenharmony_ci      reg.L3SQGeneralPriorityCreditInitialization =
1148bf215546Sopenharmony_ci         devinfo->platform == INTEL_PLATFORM_BYT ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT;
1149bf215546Sopenharmony_ci#endif
1150bf215546Sopenharmony_ci      reg.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT;
1151bf215546Sopenharmony_ci   };
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_ci   crocus_pack_state(GENX(L3CNTLREG2), &l3cr2, reg) {
1154bf215546Sopenharmony_ci      reg.SLMEnable = has_slm;
1155bf215546Sopenharmony_ci      reg.URBLowBandwidth = urb_low_bw;
1156bf215546Sopenharmony_ci      reg.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb;
1157bf215546Sopenharmony_ci#if !(GFX_VERx10 == 75)
1158bf215546Sopenharmony_ci      reg.ALLAllocation = cfg->n[INTEL_L3P_ALL];
1159bf215546Sopenharmony_ci#endif
1160bf215546Sopenharmony_ci      reg.ROAllocation = cfg->n[INTEL_L3P_RO];
1161bf215546Sopenharmony_ci      reg.DCAllocation = cfg->n[INTEL_L3P_DC];
1162bf215546Sopenharmony_ci   };
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci   crocus_pack_state(GENX(L3CNTLREG3), &l3cr3, reg) {
1165bf215546Sopenharmony_ci      reg.ISAllocation = cfg->n[INTEL_L3P_IS];
1166bf215546Sopenharmony_ci      reg.ISLowBandwidth = 0;
1167bf215546Sopenharmony_ci      reg.CAllocation = cfg->n[INTEL_L3P_C];
1168bf215546Sopenharmony_ci      reg.CLowBandwidth = 0;
1169bf215546Sopenharmony_ci      reg.TAllocation = cfg->n[INTEL_L3P_T];
1170bf215546Sopenharmony_ci      reg.TLowBandwidth = 0;
1171bf215546Sopenharmony_ci   };
1172bf215546Sopenharmony_ci
1173bf215546Sopenharmony_ci   /* Set up the L3 partitioning. */
1174bf215546Sopenharmony_ci   crocus_emit_lri(batch, L3SQCREG1, l3sqcr1);
1175bf215546Sopenharmony_ci   crocus_emit_lri(batch, L3CNTLREG2, l3cr2);
1176bf215546Sopenharmony_ci   crocus_emit_lri(batch, L3CNTLREG3, l3cr3);
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_ci#if GFX_VERSIONx10 == 75
1179bf215546Sopenharmony_ci   /* TODO: Fail screen creation if command parser version < 4 */
1180bf215546Sopenharmony_ci   uint32_t scratch1, chicken3;
1181bf215546Sopenharmony_ci   crocus_pack_state(GENX(SCRATCH1), &scratch1, reg) {
1182bf215546Sopenharmony_ci      reg.L3AtomicDisable = !has_dc;
1183bf215546Sopenharmony_ci   }
1184bf215546Sopenharmony_ci   crocus_pack_state(GENX(CHICKEN3), &chicken3, reg) {
1185bf215546Sopenharmony_ci      reg.L3AtomicDisableMask = true;
1186bf215546Sopenharmony_ci      reg.L3AtomicDisable = !has_dc;
1187bf215546Sopenharmony_ci   }
1188bf215546Sopenharmony_ci   crocus_emit_lri(batch, SCRATCH1, scratch1);
1189bf215546Sopenharmony_ci   crocus_emit_lri(batch, CHICKEN3, chicken3);
1190bf215546Sopenharmony_ci#endif
1191bf215546Sopenharmony_ci#endif
1192bf215546Sopenharmony_ci}
1193bf215546Sopenharmony_ci
1194bf215546Sopenharmony_cistatic void
1195bf215546Sopenharmony_ciemit_l3_state(struct crocus_batch *batch, bool compute)
1196bf215546Sopenharmony_ci{
1197bf215546Sopenharmony_ci   const struct intel_l3_config *const cfg =
1198bf215546Sopenharmony_ci      compute ? batch->screen->l3_config_cs : batch->screen->l3_config_3d;
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_ci   setup_l3_config(batch, cfg);
1201bf215546Sopenharmony_ci   if (INTEL_DEBUG(DEBUG_L3)) {
1202bf215546Sopenharmony_ci      intel_dump_l3_config(cfg, stderr);
1203bf215546Sopenharmony_ci   }
1204bf215546Sopenharmony_ci}
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci/**
1207bf215546Sopenharmony_ci * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
1208bf215546Sopenharmony_ci */
1209bf215546Sopenharmony_cistatic void
1210bf215546Sopenharmony_cigen7_emit_cs_stall_flush(struct crocus_batch *batch)
1211bf215546Sopenharmony_ci{
1212bf215546Sopenharmony_ci   crocus_emit_pipe_control_write(batch,
1213bf215546Sopenharmony_ci                                  "workaround",
1214bf215546Sopenharmony_ci                                  PIPE_CONTROL_CS_STALL
1215bf215546Sopenharmony_ci                                  | PIPE_CONTROL_WRITE_IMMEDIATE,
1216bf215546Sopenharmony_ci                                  batch->ice->workaround_bo,
1217bf215546Sopenharmony_ci                                  batch->ice->workaround_offset, 0);
1218bf215546Sopenharmony_ci}
1219bf215546Sopenharmony_ci#endif
1220bf215546Sopenharmony_ci
1221bf215546Sopenharmony_cistatic void
1222bf215546Sopenharmony_ciemit_pipeline_select(struct crocus_batch *batch, uint32_t pipeline)
1223bf215546Sopenharmony_ci{
1224bf215546Sopenharmony_ci#if GFX_VER == 8
1225bf215546Sopenharmony_ci   /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
1226bf215546Sopenharmony_ci    *
1227bf215546Sopenharmony_ci    *   Software must clear the COLOR_CALC_STATE Valid field in
1228bf215546Sopenharmony_ci    *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
1229bf215546Sopenharmony_ci    *   with Pipeline Select set to GPGPU.
1230bf215546Sopenharmony_ci    *
1231bf215546Sopenharmony_ci    * The internal hardware docs recommend the same workaround for Gfx9
1232bf215546Sopenharmony_ci    * hardware too.
1233bf215546Sopenharmony_ci    */
1234bf215546Sopenharmony_ci   if (pipeline == GPGPU)
1235bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
1236bf215546Sopenharmony_ci#endif
1237bf215546Sopenharmony_ci
1238bf215546Sopenharmony_ci#if GFX_VER >= 6
1239bf215546Sopenharmony_ci   /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
1240bf215546Sopenharmony_ci    * PIPELINE_SELECT [DevBWR+]":
1241bf215546Sopenharmony_ci    *
1242bf215546Sopenharmony_ci    *    "Project: DEVSNB+
1243bf215546Sopenharmony_ci    *
1244bf215546Sopenharmony_ci    *     Software must ensure all the write caches are flushed through a
1245bf215546Sopenharmony_ci    *     stalling PIPE_CONTROL command followed by another PIPE_CONTROL
1246bf215546Sopenharmony_ci    *     command to invalidate read only caches prior to programming
1247bf215546Sopenharmony_ci    *     MI_PIPELINE_SELECT command to change the Pipeline Select Mode."
1248bf215546Sopenharmony_ci    */
1249bf215546Sopenharmony_ci   const unsigned dc_flush =
1250bf215546Sopenharmony_ci      GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
1251bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch,
1252bf215546Sopenharmony_ci                                  "workaround: PIPELINE_SELECT flushes (1/2)",
1253bf215546Sopenharmony_ci                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
1254bf215546Sopenharmony_ci                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1255bf215546Sopenharmony_ci                                  dc_flush |
1256bf215546Sopenharmony_ci                                  PIPE_CONTROL_CS_STALL);
1257bf215546Sopenharmony_ci
1258bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch,
1259bf215546Sopenharmony_ci                                  "workaround: PIPELINE_SELECT flushes (2/2)",
1260bf215546Sopenharmony_ci                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1261bf215546Sopenharmony_ci                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
1262bf215546Sopenharmony_ci                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE |
1263bf215546Sopenharmony_ci                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE);
1264bf215546Sopenharmony_ci#else
1265bf215546Sopenharmony_ci   /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
1266bf215546Sopenharmony_ci    * PIPELINE_SELECT [DevBWR+]":
1267bf215546Sopenharmony_ci    *
1268bf215546Sopenharmony_ci    *   Project: PRE-DEVSNB
1269bf215546Sopenharmony_ci    *
1270bf215546Sopenharmony_ci    *   Software must ensure the current pipeline is flushed via an
1271bf215546Sopenharmony_ci    *   MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
1272bf215546Sopenharmony_ci    */
1273bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_FLUSH), foo);
1274bf215546Sopenharmony_ci#endif
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) {
1277bf215546Sopenharmony_ci      sel.PipelineSelection = pipeline;
1278bf215546Sopenharmony_ci   }
1279bf215546Sopenharmony_ci
1280bf215546Sopenharmony_ci#if GFX_VER == 7 && !(GFX_VERx10 == 75)
1281bf215546Sopenharmony_ci   if (pipeline == _3D) {
1282bf215546Sopenharmony_ci      gen7_emit_cs_stall_flush(batch);
1283bf215546Sopenharmony_ci
1284bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
1285bf215546Sopenharmony_ci         prim.PrimitiveTopologyType = _3DPRIM_POINTLIST;
1286bf215546Sopenharmony_ci      };
1287bf215546Sopenharmony_ci   }
1288bf215546Sopenharmony_ci#endif
1289bf215546Sopenharmony_ci}
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci/**
1292bf215546Sopenharmony_ci * The following diagram shows how we partition the URB:
1293bf215546Sopenharmony_ci *
1294bf215546Sopenharmony_ci *        16kB or 32kB               Rest of the URB space
1295bf215546Sopenharmony_ci *   __________-__________   _________________-_________________
1296bf215546Sopenharmony_ci *  /                     \ /                                   \
1297bf215546Sopenharmony_ci * +-------------------------------------------------------------+
1298bf215546Sopenharmony_ci * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
1299bf215546Sopenharmony_ci * |       Constants       |               Entries               |
1300bf215546Sopenharmony_ci * +-------------------------------------------------------------+
1301bf215546Sopenharmony_ci *
1302bf215546Sopenharmony_ci * Notably, push constants must be stored at the beginning of the URB
1303bf215546Sopenharmony_ci * space, while entries can be stored anywhere.  Ivybridge and Haswell
1304bf215546Sopenharmony_ci * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
1305bf215546Sopenharmony_ci * doubles this (32kB).
1306bf215546Sopenharmony_ci *
1307bf215546Sopenharmony_ci * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
1308bf215546Sopenharmony_ci * sized) in increments of 1kB.  Haswell GT3 requires them to be located and
1309bf215546Sopenharmony_ci * sized in increments of 2kB.
1310bf215546Sopenharmony_ci *
1311bf215546Sopenharmony_ci * Currently we split the constant buffer space evenly among whatever stages
1312bf215546Sopenharmony_ci * are active.  This is probably not ideal, but simple.
1313bf215546Sopenharmony_ci *
1314bf215546Sopenharmony_ci * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
1315bf215546Sopenharmony_ci * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
1316bf215546Sopenharmony_ci * Haswell GT3 has 512kB of URB space.
1317bf215546Sopenharmony_ci *
1318bf215546Sopenharmony_ci * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
1319bf215546Sopenharmony_ci * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
1320bf215546Sopenharmony_ci */
1321bf215546Sopenharmony_ci#if GFX_VER >= 7
1322bf215546Sopenharmony_cistatic void
1323bf215546Sopenharmony_cicrocus_alloc_push_constants(struct crocus_batch *batch)
1324bf215546Sopenharmony_ci{
1325bf215546Sopenharmony_ci   const unsigned push_constant_kb =
1326bf215546Sopenharmony_ci      batch->screen->devinfo.max_constant_urb_size_kb;
1327bf215546Sopenharmony_ci   unsigned size_per_stage = push_constant_kb / 5;
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci   /* For now, we set a static partitioning of the push constant area,
1330bf215546Sopenharmony_ci    * assuming that all stages could be in use.
1331bf215546Sopenharmony_ci    *
1332bf215546Sopenharmony_ci    * TODO: Try lazily allocating the HS/DS/GS sections as needed, and
1333bf215546Sopenharmony_ci    *       see if that improves performance by offering more space to
1334bf215546Sopenharmony_ci    *       the VS/FS when those aren't in use.  Also, try dynamically
1335bf215546Sopenharmony_ci    *       enabling/disabling it like i965 does.  This would be more
1336bf215546Sopenharmony_ci    *       stalls and may not actually help; we don't know yet.
1337bf215546Sopenharmony_ci    */
1338bf215546Sopenharmony_ci   for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
1339bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
1340bf215546Sopenharmony_ci         alloc._3DCommandSubOpcode = 18 + i;
1341bf215546Sopenharmony_ci         alloc.ConstantBufferOffset = size_per_stage * i;
1342bf215546Sopenharmony_ci         alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? (push_constant_kb - 4 * size_per_stage) : size_per_stage;
1343bf215546Sopenharmony_ci      }
1344bf215546Sopenharmony_ci   }
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci   /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
1347bf215546Sopenharmony_ci    *
1348bf215546Sopenharmony_ci    *     A PIPE_CONTROL command with the CS Stall bit set must be programmed
1349bf215546Sopenharmony_ci    *     in the ring after this instruction.
1350bf215546Sopenharmony_ci    *
1351bf215546Sopenharmony_ci    * No such restriction exists for Haswell or Baytrail.
1352bf215546Sopenharmony_ci    */
1353bf215546Sopenharmony_ci   if (batch->screen->devinfo.platform == INTEL_PLATFORM_IVB)
1354bf215546Sopenharmony_ci      gen7_emit_cs_stall_flush(batch);
1355bf215546Sopenharmony_ci}
1356bf215546Sopenharmony_ci#endif
1357bf215546Sopenharmony_ci
1358bf215546Sopenharmony_ci/**
1359bf215546Sopenharmony_ci * Upload the initial GPU state for a render context.
1360bf215546Sopenharmony_ci *
1361bf215546Sopenharmony_ci * This sets some invariant state that needs to be programmed a particular
1362bf215546Sopenharmony_ci * way, but we never actually change.
1363bf215546Sopenharmony_ci */
1364bf215546Sopenharmony_cistatic void
1365bf215546Sopenharmony_cicrocus_init_render_context(struct crocus_batch *batch)
1366bf215546Sopenharmony_ci{
1367bf215546Sopenharmony_ci   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
1368bf215546Sopenharmony_ci
1369bf215546Sopenharmony_ci   emit_pipeline_select(batch, _3D);
1370bf215546Sopenharmony_ci
1371bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(STATE_SIP), foo);
1372bf215546Sopenharmony_ci
1373bf215546Sopenharmony_ci#if GFX_VER >= 7
1374bf215546Sopenharmony_ci   emit_l3_state(batch, false);
1375bf215546Sopenharmony_ci#endif
1376bf215546Sopenharmony_ci#if (GFX_VERx10 == 70 || GFX_VERx10 == 80)
1377bf215546Sopenharmony_ci   crocus_emit_reg(batch, GENX(INSTPM), reg) {
1378bf215546Sopenharmony_ci      reg.CONSTANT_BUFFERAddressOffsetDisable = true;
1379bf215546Sopenharmony_ci      reg.CONSTANT_BUFFERAddressOffsetDisableMask = true;
1380bf215546Sopenharmony_ci   }
1381bf215546Sopenharmony_ci#endif
1382bf215546Sopenharmony_ci#if GFX_VER >= 5 || GFX_VERx10 == 45
1383bf215546Sopenharmony_ci   /* Use the legacy AA line coverage computation. */
1384bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
1385bf215546Sopenharmony_ci#endif
1386bf215546Sopenharmony_ci
1387bf215546Sopenharmony_ci   /* No polygon stippling offsets are necessary. */
1388bf215546Sopenharmony_ci   /* TODO: may need to set an offset for origin-UL framebuffers */
1389bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
1390bf215546Sopenharmony_ci
1391bf215546Sopenharmony_ci#if GFX_VER >= 7
1392bf215546Sopenharmony_ci   crocus_alloc_push_constants(batch);
1393bf215546Sopenharmony_ci#endif
1394bf215546Sopenharmony_ci
1395bf215546Sopenharmony_ci#if GFX_VER == 8
1396bf215546Sopenharmony_ci   /* Set the initial MSAA sample positions. */
1397bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
1398bf215546Sopenharmony_ci      INTEL_SAMPLE_POS_1X(pat._1xSample);
1399bf215546Sopenharmony_ci      INTEL_SAMPLE_POS_2X(pat._2xSample);
1400bf215546Sopenharmony_ci      INTEL_SAMPLE_POS_4X(pat._4xSample);
1401bf215546Sopenharmony_ci      INTEL_SAMPLE_POS_8X(pat._8xSample);
1402bf215546Sopenharmony_ci   }
1403bf215546Sopenharmony_ci
1404bf215546Sopenharmony_ci   /* Disable chromakeying (it's for media) */
1405bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_ci   /* We want regular rendering, not special HiZ operations. */
1408bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
1409bf215546Sopenharmony_ci#endif
1410bf215546Sopenharmony_ci}
1411bf215546Sopenharmony_ci
1412bf215546Sopenharmony_ci#if GFX_VER >= 7
1413bf215546Sopenharmony_cistatic void
1414bf215546Sopenharmony_cicrocus_init_compute_context(struct crocus_batch *batch)
1415bf215546Sopenharmony_ci{
1416bf215546Sopenharmony_ci   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
1417bf215546Sopenharmony_ci
1418bf215546Sopenharmony_ci   emit_pipeline_select(batch, GPGPU);
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci#if GFX_VER >= 7
1421bf215546Sopenharmony_ci   emit_l3_state(batch, true);
1422bf215546Sopenharmony_ci#endif
1423bf215546Sopenharmony_ci}
1424bf215546Sopenharmony_ci#endif
1425bf215546Sopenharmony_ci
1426bf215546Sopenharmony_ci/**
1427bf215546Sopenharmony_ci * Generation-specific context state (ice->state.genx->...).
1428bf215546Sopenharmony_ci *
1429bf215546Sopenharmony_ci * Most state can go in crocus_context directly, but these encode hardware
1430bf215546Sopenharmony_ci * packets which vary by generation.
1431bf215546Sopenharmony_ci */
1432bf215546Sopenharmony_cistruct crocus_genx_state {
1433bf215546Sopenharmony_ci   struct {
1434bf215546Sopenharmony_ci#if GFX_VER >= 7
1435bf215546Sopenharmony_ci      struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES];
1436bf215546Sopenharmony_ci#endif
1437bf215546Sopenharmony_ci   } shaders[MESA_SHADER_STAGES];
1438bf215546Sopenharmony_ci
1439bf215546Sopenharmony_ci#if GFX_VER == 8
1440bf215546Sopenharmony_ci   bool pma_fix_enabled;
1441bf215546Sopenharmony_ci#endif
1442bf215546Sopenharmony_ci};
1443bf215546Sopenharmony_ci
1444bf215546Sopenharmony_ci/**
1445bf215546Sopenharmony_ci * The pipe->set_blend_color() driver hook.
1446bf215546Sopenharmony_ci *
1447bf215546Sopenharmony_ci * This corresponds to our COLOR_CALC_STATE.
1448bf215546Sopenharmony_ci */
1449bf215546Sopenharmony_cistatic void
1450bf215546Sopenharmony_cicrocus_set_blend_color(struct pipe_context *ctx,
1451bf215546Sopenharmony_ci                       const struct pipe_blend_color *state)
1452bf215546Sopenharmony_ci{
1453bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
1454bf215546Sopenharmony_ci
1455bf215546Sopenharmony_ci   /* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */
1456bf215546Sopenharmony_ci   memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
1457bf215546Sopenharmony_ci#if GFX_VER <= 5
1458bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN4_CONSTANT_COLOR;
1459bf215546Sopenharmony_ci#else
1460bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1461bf215546Sopenharmony_ci#endif
1462bf215546Sopenharmony_ci}
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_ci/**
1465bf215546Sopenharmony_ci * Gallium CSO for blend state (see pipe_blend_state).
1466bf215546Sopenharmony_ci */
1467bf215546Sopenharmony_cistruct crocus_blend_state {
1468bf215546Sopenharmony_ci#if GFX_VER == 8
1469bf215546Sopenharmony_ci   /** Partial 3DSTATE_PS_BLEND */
1470bf215546Sopenharmony_ci   uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
1471bf215546Sopenharmony_ci#endif
1472bf215546Sopenharmony_ci
1473bf215546Sopenharmony_ci   /** copy of BLEND_STATE */
1474bf215546Sopenharmony_ci   struct pipe_blend_state cso;
1475bf215546Sopenharmony_ci
1476bf215546Sopenharmony_ci   /** Bitfield of whether blending is enabled for RT[i] - for aux resolves */
1477bf215546Sopenharmony_ci   uint8_t blend_enables;
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_ci   /** Bitfield of whether color writes are enabled for RT[i] */
1480bf215546Sopenharmony_ci   uint8_t color_write_enables;
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci   /** Does RT[0] use dual color blending? */
1483bf215546Sopenharmony_ci   bool dual_color_blending;
1484bf215546Sopenharmony_ci};
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_cistatic enum pipe_blendfactor
1487bf215546Sopenharmony_cifix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one)
1488bf215546Sopenharmony_ci{
1489bf215546Sopenharmony_ci   if (alpha_to_one) {
1490bf215546Sopenharmony_ci      if (f == PIPE_BLENDFACTOR_SRC1_ALPHA)
1491bf215546Sopenharmony_ci         return PIPE_BLENDFACTOR_ONE;
1492bf215546Sopenharmony_ci
1493bf215546Sopenharmony_ci      if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA)
1494bf215546Sopenharmony_ci         return PIPE_BLENDFACTOR_ZERO;
1495bf215546Sopenharmony_ci   }
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci   return f;
1498bf215546Sopenharmony_ci}
1499bf215546Sopenharmony_ci
1500bf215546Sopenharmony_ci#if GFX_VER >= 6
1501bf215546Sopenharmony_citypedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
1502bf215546Sopenharmony_ci#else
1503bf215546Sopenharmony_citypedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
1504bf215546Sopenharmony_ci#endif
1505bf215546Sopenharmony_ci
1506bf215546Sopenharmony_cistatic bool
1507bf215546Sopenharmony_cican_emit_logic_op(struct crocus_context *ice)
1508bf215546Sopenharmony_ci{
1509bf215546Sopenharmony_ci   /* all pre gen8 have logicop restricted to unorm */
1510bf215546Sopenharmony_ci   enum pipe_format pformat = PIPE_FORMAT_NONE;
1511bf215546Sopenharmony_ci   for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
1512bf215546Sopenharmony_ci      if (ice->state.framebuffer.cbufs[i]) {
1513bf215546Sopenharmony_ci         pformat = ice->state.framebuffer.cbufs[i]->format;
1514bf215546Sopenharmony_ci         break;
1515bf215546Sopenharmony_ci      }
1516bf215546Sopenharmony_ci   }
1517bf215546Sopenharmony_ci   return (pformat == PIPE_FORMAT_NONE || util_format_is_unorm(pformat));
1518bf215546Sopenharmony_ci}
1519bf215546Sopenharmony_ci
1520bf215546Sopenharmony_cistatic bool
1521bf215546Sopenharmony_ciset_blend_entry_bits(struct crocus_batch *batch, BLEND_ENTRY_GENXML *entry,
1522bf215546Sopenharmony_ci                     struct crocus_blend_state *cso_blend,
1523bf215546Sopenharmony_ci                     int idx)
1524bf215546Sopenharmony_ci{
1525bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
1526bf215546Sopenharmony_ci   bool independent_alpha_blend = false;
1527bf215546Sopenharmony_ci   const struct pipe_rt_blend_state *rt =
1528bf215546Sopenharmony_ci      &cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? idx : 0];
1529bf215546Sopenharmony_ci   const unsigned blend_enabled = rt->blend_enable;
1530bf215546Sopenharmony_ci
1531bf215546Sopenharmony_ci   enum pipe_blendfactor src_rgb =
1532bf215546Sopenharmony_ci      fix_blendfactor(rt->rgb_src_factor, cso_blend->cso.alpha_to_one);
1533bf215546Sopenharmony_ci   enum pipe_blendfactor src_alpha =
1534bf215546Sopenharmony_ci      fix_blendfactor(rt->alpha_src_factor, cso_blend->cso.alpha_to_one);
1535bf215546Sopenharmony_ci   enum pipe_blendfactor dst_rgb =
1536bf215546Sopenharmony_ci      fix_blendfactor(rt->rgb_dst_factor, cso_blend->cso.alpha_to_one);
1537bf215546Sopenharmony_ci   enum pipe_blendfactor dst_alpha =
1538bf215546Sopenharmony_ci      fix_blendfactor(rt->alpha_dst_factor, cso_blend->cso.alpha_to_one);
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci   if (rt->rgb_func != rt->alpha_func ||
1541bf215546Sopenharmony_ci       src_rgb != src_alpha || dst_rgb != dst_alpha)
1542bf215546Sopenharmony_ci      independent_alpha_blend = true;
1543bf215546Sopenharmony_ci   if (cso_blend->cso.logicop_enable) {
1544bf215546Sopenharmony_ci      if (GFX_VER >= 8 || can_emit_logic_op(ice)) {
1545bf215546Sopenharmony_ci         entry->LogicOpEnable = cso_blend->cso.logicop_enable;
1546bf215546Sopenharmony_ci         entry->LogicOpFunction = cso_blend->cso.logicop_func;
1547bf215546Sopenharmony_ci      }
1548bf215546Sopenharmony_ci   } else if (blend_enabled) {
1549bf215546Sopenharmony_ci      if (idx == 0) {
1550bf215546Sopenharmony_ci         struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
1551bf215546Sopenharmony_ci         struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1552bf215546Sopenharmony_ci         entry->ColorBufferBlendEnable =
1553bf215546Sopenharmony_ci            (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);
1554bf215546Sopenharmony_ci      } else
1555bf215546Sopenharmony_ci         entry->ColorBufferBlendEnable = 1;
1556bf215546Sopenharmony_ci
1557bf215546Sopenharmony_ci      entry->ColorBlendFunction          = rt->rgb_func;
1558bf215546Sopenharmony_ci      entry->AlphaBlendFunction          = rt->alpha_func;
1559bf215546Sopenharmony_ci      entry->SourceBlendFactor           = (int) src_rgb;
1560bf215546Sopenharmony_ci      entry->SourceAlphaBlendFactor      = (int) src_alpha;
1561bf215546Sopenharmony_ci      entry->DestinationBlendFactor      = (int) dst_rgb;
1562bf215546Sopenharmony_ci      entry->DestinationAlphaBlendFactor = (int) dst_alpha;
1563bf215546Sopenharmony_ci   }
1564bf215546Sopenharmony_ci#if GFX_VER <= 5
1565bf215546Sopenharmony_ci   /*
1566bf215546Sopenharmony_ci    * Gen4/GM45/ILK can't handle have ColorBufferBlendEnable == 0
1567bf215546Sopenharmony_ci    * when a dual src blend shader is in use. Setup dummy blending.
1568bf215546Sopenharmony_ci    */
1569bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
1570bf215546Sopenharmony_ci   struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1571bf215546Sopenharmony_ci   if (idx == 0 && !blend_enabled && wm_prog_data->dual_src_blend) {
1572bf215546Sopenharmony_ci      entry->ColorBufferBlendEnable = 1;
1573bf215546Sopenharmony_ci      entry->ColorBlendFunction = PIPE_BLEND_ADD;
1574bf215546Sopenharmony_ci      entry->AlphaBlendFunction = PIPE_BLEND_ADD;
1575bf215546Sopenharmony_ci      entry->SourceBlendFactor = PIPE_BLENDFACTOR_ONE;
1576bf215546Sopenharmony_ci      entry->SourceAlphaBlendFactor = PIPE_BLENDFACTOR_ONE;
1577bf215546Sopenharmony_ci      entry->DestinationBlendFactor = PIPE_BLENDFACTOR_ZERO;
1578bf215546Sopenharmony_ci      entry->DestinationAlphaBlendFactor = PIPE_BLENDFACTOR_ZERO;
1579bf215546Sopenharmony_ci   }
1580bf215546Sopenharmony_ci#endif
1581bf215546Sopenharmony_ci   return independent_alpha_blend;
1582bf215546Sopenharmony_ci}
1583bf215546Sopenharmony_ci
1584bf215546Sopenharmony_ci/**
1585bf215546Sopenharmony_ci * The pipe->create_blend_state() driver hook.
1586bf215546Sopenharmony_ci *
1587bf215546Sopenharmony_ci * Translates a pipe_blend_state into crocus_blend_state.
1588bf215546Sopenharmony_ci */
1589bf215546Sopenharmony_cistatic void *
1590bf215546Sopenharmony_cicrocus_create_blend_state(struct pipe_context *ctx,
1591bf215546Sopenharmony_ci                          const struct pipe_blend_state *state)
1592bf215546Sopenharmony_ci{
1593bf215546Sopenharmony_ci   struct crocus_blend_state *cso = malloc(sizeof(struct crocus_blend_state));
1594bf215546Sopenharmony_ci
1595bf215546Sopenharmony_ci   cso->blend_enables = 0;
1596bf215546Sopenharmony_ci   cso->color_write_enables = 0;
1597bf215546Sopenharmony_ci   STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8);
1598bf215546Sopenharmony_ci
1599bf215546Sopenharmony_ci   cso->cso = *state;
1600bf215546Sopenharmony_ci   cso->dual_color_blending = util_blend_state_is_dual(state, 0);
1601bf215546Sopenharmony_ci
1602bf215546Sopenharmony_ci#if GFX_VER == 8
1603bf215546Sopenharmony_ci   bool indep_alpha_blend = false;
1604bf215546Sopenharmony_ci#endif
1605bf215546Sopenharmony_ci   for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
1606bf215546Sopenharmony_ci      const struct pipe_rt_blend_state *rt =
1607bf215546Sopenharmony_ci         &state->rt[state->independent_blend_enable ? i : 0];
1608bf215546Sopenharmony_ci      if (rt->blend_enable)
1609bf215546Sopenharmony_ci         cso->blend_enables |= 1u << i;
1610bf215546Sopenharmony_ci      if (rt->colormask)
1611bf215546Sopenharmony_ci         cso->color_write_enables |= 1u << i;
1612bf215546Sopenharmony_ci#if GFX_VER == 8
1613bf215546Sopenharmony_ci      enum pipe_blendfactor src_rgb =
1614bf215546Sopenharmony_ci         fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one);
1615bf215546Sopenharmony_ci      enum pipe_blendfactor src_alpha =
1616bf215546Sopenharmony_ci         fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one);
1617bf215546Sopenharmony_ci      enum pipe_blendfactor dst_rgb =
1618bf215546Sopenharmony_ci         fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one);
1619bf215546Sopenharmony_ci      enum pipe_blendfactor dst_alpha =
1620bf215546Sopenharmony_ci         fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one);
1621bf215546Sopenharmony_ci
1622bf215546Sopenharmony_ci      if (rt->rgb_func != rt->alpha_func ||
1623bf215546Sopenharmony_ci          src_rgb != src_alpha || dst_rgb != dst_alpha)
1624bf215546Sopenharmony_ci         indep_alpha_blend = true;
1625bf215546Sopenharmony_ci#endif
1626bf215546Sopenharmony_ci   }
1627bf215546Sopenharmony_ci
1628bf215546Sopenharmony_ci#if GFX_VER == 8
1629bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
1630bf215546Sopenharmony_ci      /* pb.HasWriteableRT is filled in at draw time.
1631bf215546Sopenharmony_ci       * pb.AlphaTestEnable is filled in at draw time.
1632bf215546Sopenharmony_ci       *
1633bf215546Sopenharmony_ci       * pb.ColorBufferBlendEnable is filled in at draw time so we can avoid
1634bf215546Sopenharmony_ci       * setting it when dual color blending without an appropriate shader.
1635bf215546Sopenharmony_ci       */
1636bf215546Sopenharmony_ci
1637bf215546Sopenharmony_ci      pb.AlphaToCoverageEnable = state->alpha_to_coverage;
1638bf215546Sopenharmony_ci      pb.IndependentAlphaBlendEnable = indep_alpha_blend;
1639bf215546Sopenharmony_ci
1640bf215546Sopenharmony_ci      /* The casts prevent warnings about implicit enum type conversions. */
1641bf215546Sopenharmony_ci      pb.SourceBlendFactor =
1642bf215546Sopenharmony_ci         (int) fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one);
1643bf215546Sopenharmony_ci      pb.SourceAlphaBlendFactor =
1644bf215546Sopenharmony_ci         (int) fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one);
1645bf215546Sopenharmony_ci      pb.DestinationBlendFactor =
1646bf215546Sopenharmony_ci         (int) fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one);
1647bf215546Sopenharmony_ci      pb.DestinationAlphaBlendFactor =
1648bf215546Sopenharmony_ci         (int) fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one);
1649bf215546Sopenharmony_ci   }
1650bf215546Sopenharmony_ci#endif
1651bf215546Sopenharmony_ci   return cso;
1652bf215546Sopenharmony_ci}
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci/**
1655bf215546Sopenharmony_ci * The pipe->bind_blend_state() driver hook.
1656bf215546Sopenharmony_ci *
1657bf215546Sopenharmony_ci * Bind a blending CSO and flag related dirty bits.
1658bf215546Sopenharmony_ci */
1659bf215546Sopenharmony_cistatic void
1660bf215546Sopenharmony_cicrocus_bind_blend_state(struct pipe_context *ctx, void *state)
1661bf215546Sopenharmony_ci{
1662bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
1663bf215546Sopenharmony_ci   struct crocus_blend_state *cso = state;
1664bf215546Sopenharmony_ci
1665bf215546Sopenharmony_ci   ice->state.cso_blend = cso;
1666bf215546Sopenharmony_ci   ice->state.blend_enables = cso ? cso->blend_enables : 0;
1667bf215546Sopenharmony_ci
1668bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS;
1669bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_WM;
1670bf215546Sopenharmony_ci#if GFX_VER >= 6
1671bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
1672bf215546Sopenharmony_ci#endif
1673bf215546Sopenharmony_ci#if GFX_VER >= 7
1674bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;
1675bf215546Sopenharmony_ci#endif
1676bf215546Sopenharmony_ci#if GFX_VER == 8
1677bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
1678bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;
1679bf215546Sopenharmony_ci#endif
1680bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1681bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
1682bf215546Sopenharmony_ci   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_BLEND];
1683bf215546Sopenharmony_ci}
1684bf215546Sopenharmony_ci
1685bf215546Sopenharmony_ci/**
1686bf215546Sopenharmony_ci * Return true if the FS writes to any color outputs which are not disabled
1687bf215546Sopenharmony_ci * via color masking.
1688bf215546Sopenharmony_ci */
1689bf215546Sopenharmony_cistatic bool
1690bf215546Sopenharmony_cihas_writeable_rt(const struct crocus_blend_state *cso_blend,
1691bf215546Sopenharmony_ci                 const struct shader_info *fs_info)
1692bf215546Sopenharmony_ci{
1693bf215546Sopenharmony_ci   if (!fs_info)
1694bf215546Sopenharmony_ci      return false;
1695bf215546Sopenharmony_ci
1696bf215546Sopenharmony_ci   unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0;
1697bf215546Sopenharmony_ci
1698bf215546Sopenharmony_ci   if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
1699bf215546Sopenharmony_ci      rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1;
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_ci   return cso_blend->color_write_enables & rt_outputs;
1702bf215546Sopenharmony_ci}
1703bf215546Sopenharmony_ci
1704bf215546Sopenharmony_ci/**
1705bf215546Sopenharmony_ci * Gallium CSO for depth, stencil, and alpha testing state.
1706bf215546Sopenharmony_ci */
1707bf215546Sopenharmony_cistruct crocus_depth_stencil_alpha_state {
1708bf215546Sopenharmony_ci   struct pipe_depth_stencil_alpha_state cso;
1709bf215546Sopenharmony_ci
1710bf215546Sopenharmony_ci   bool depth_writes_enabled;
1711bf215546Sopenharmony_ci   bool stencil_writes_enabled;
1712bf215546Sopenharmony_ci};
1713bf215546Sopenharmony_ci
1714bf215546Sopenharmony_ci/**
1715bf215546Sopenharmony_ci * The pipe->create_depth_stencil_alpha_state() driver hook.
1716bf215546Sopenharmony_ci *
1717bf215546Sopenharmony_ci * We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha
1718bf215546Sopenharmony_ci * testing state since we need pieces of it in a variety of places.
1719bf215546Sopenharmony_ci */
1720bf215546Sopenharmony_cistatic void *
1721bf215546Sopenharmony_cicrocus_create_zsa_state(struct pipe_context *ctx,
1722bf215546Sopenharmony_ci                        const struct pipe_depth_stencil_alpha_state *state)
1723bf215546Sopenharmony_ci{
1724bf215546Sopenharmony_ci   struct crocus_depth_stencil_alpha_state *cso =
1725bf215546Sopenharmony_ci      malloc(sizeof(struct crocus_depth_stencil_alpha_state));
1726bf215546Sopenharmony_ci
1727bf215546Sopenharmony_ci   bool two_sided_stencil = state->stencil[1].enabled;
1728bf215546Sopenharmony_ci   cso->cso = *state;
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci   cso->depth_writes_enabled = state->depth_writemask;
1731bf215546Sopenharmony_ci   cso->stencil_writes_enabled =
1732bf215546Sopenharmony_ci      state->stencil[0].writemask != 0 ||
1733bf215546Sopenharmony_ci      (two_sided_stencil && state->stencil[1].writemask != 0);
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_ci   /* The state tracker needs to optimize away EQUAL writes for us. */
1736bf215546Sopenharmony_ci   assert(!(state->depth_func == PIPE_FUNC_EQUAL && state->depth_writemask));
1737bf215546Sopenharmony_ci
1738bf215546Sopenharmony_ci   return cso;
1739bf215546Sopenharmony_ci}
1740bf215546Sopenharmony_ci
1741bf215546Sopenharmony_ci/**
1742bf215546Sopenharmony_ci * The pipe->bind_depth_stencil_alpha_state() driver hook.
1743bf215546Sopenharmony_ci *
1744bf215546Sopenharmony_ci * Bind a depth/stencil/alpha CSO and flag related dirty bits.
1745bf215546Sopenharmony_ci */
1746bf215546Sopenharmony_cistatic void
1747bf215546Sopenharmony_cicrocus_bind_zsa_state(struct pipe_context *ctx, void *state)
1748bf215546Sopenharmony_ci{
1749bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
1750bf215546Sopenharmony_ci   struct crocus_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
1751bf215546Sopenharmony_ci   struct crocus_depth_stencil_alpha_state *new_cso = state;
1752bf215546Sopenharmony_ci
1753bf215546Sopenharmony_ci   if (new_cso) {
1754bf215546Sopenharmony_ci      if (cso_changed(cso.alpha_ref_value))
1755bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1756bf215546Sopenharmony_ci
1757bf215546Sopenharmony_ci      if (cso_changed(cso.alpha_enabled))
1758bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_WM;
1759bf215546Sopenharmony_ci#if GFX_VER >= 6
1760bf215546Sopenharmony_ci      if (cso_changed(cso.alpha_enabled))
1761bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
1762bf215546Sopenharmony_ci
1763bf215546Sopenharmony_ci      if (cso_changed(cso.alpha_func))
1764bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
1765bf215546Sopenharmony_ci#endif
1766bf215546Sopenharmony_ci#if GFX_VER == 8
1767bf215546Sopenharmony_ci      if (cso_changed(cso.alpha_enabled))
1768bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;
1769bf215546Sopenharmony_ci#endif
1770bf215546Sopenharmony_ci
1771bf215546Sopenharmony_ci      if (cso_changed(depth_writes_enabled))
1772bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
1773bf215546Sopenharmony_ci
1774bf215546Sopenharmony_ci      ice->state.depth_writes_enabled = new_cso->depth_writes_enabled;
1775bf215546Sopenharmony_ci      ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled;
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci#if GFX_VER <= 5
1778bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
1779bf215546Sopenharmony_ci#endif
1780bf215546Sopenharmony_ci   }
1781bf215546Sopenharmony_ci
1782bf215546Sopenharmony_ci   ice->state.cso_zsa = new_cso;
1783bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
1784bf215546Sopenharmony_ci#if GFX_VER >= 6
1785bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;
1786bf215546Sopenharmony_ci#endif
1787bf215546Sopenharmony_ci#if GFX_VER == 8
1788bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
1789bf215546Sopenharmony_ci#endif
1790bf215546Sopenharmony_ci   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_DEPTH_STENCIL_ALPHA];
1791bf215546Sopenharmony_ci}
1792bf215546Sopenharmony_ci
1793bf215546Sopenharmony_ci#if GFX_VER == 8
1794bf215546Sopenharmony_cistatic bool
1795bf215546Sopenharmony_ciwant_pma_fix(struct crocus_context *ice)
1796bf215546Sopenharmony_ci{
1797bf215546Sopenharmony_ci   UNUSED struct crocus_screen *screen = (void *) ice->ctx.screen;
1798bf215546Sopenharmony_ci   UNUSED const struct intel_device_info *devinfo = &screen->devinfo;
1799bf215546Sopenharmony_ci   const struct brw_wm_prog_data *wm_prog_data = (void *)
1800bf215546Sopenharmony_ci      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1801bf215546Sopenharmony_ci   const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
1802bf215546Sopenharmony_ci   const struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
1803bf215546Sopenharmony_ci   const struct crocus_blend_state *cso_blend = ice->state.cso_blend;
1804bf215546Sopenharmony_ci
1805bf215546Sopenharmony_ci   /* In very specific combinations of state, we can instruct Gfx8-9 hardware
1806bf215546Sopenharmony_ci    * to avoid stalling at the pixel mask array.  The state equations are
1807bf215546Sopenharmony_ci    * documented in these places:
1808bf215546Sopenharmony_ci    *
1809bf215546Sopenharmony_ci    * - Gfx8 Depth PMA Fix:   CACHE_MODE_1::NP_PMA_FIX_ENABLE
1810bf215546Sopenharmony_ci    * - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
1811bf215546Sopenharmony_ci    *
1812bf215546Sopenharmony_ci    * Both equations share some common elements:
1813bf215546Sopenharmony_ci    *
1814bf215546Sopenharmony_ci    *    no_hiz_op =
1815bf215546Sopenharmony_ci    *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
1816bf215546Sopenharmony_ci    *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
1817bf215546Sopenharmony_ci    *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
1818bf215546Sopenharmony_ci    *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
1819bf215546Sopenharmony_ci    *
1820bf215546Sopenharmony_ci    *    killpixels =
1821bf215546Sopenharmony_ci    *       3DSTATE_WM::ForceKillPix != ForceOff &&
1822bf215546Sopenharmony_ci    *       (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1823bf215546Sopenharmony_ci    *        3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1824bf215546Sopenharmony_ci    *        3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1825bf215546Sopenharmony_ci    *        3DSTATE_PS_BLEND::AlphaTestEnable ||
1826bf215546Sopenharmony_ci    *        3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1827bf215546Sopenharmony_ci    *
1828bf215546Sopenharmony_ci    *    (Technically the stencil PMA treats ForceKillPix differently,
1829bf215546Sopenharmony_ci    *     but I think this is a documentation oversight, and we don't
1830bf215546Sopenharmony_ci    *     ever use it in this way, so it doesn't matter).
1831bf215546Sopenharmony_ci    *
1832bf215546Sopenharmony_ci    *    common_pma_fix =
1833bf215546Sopenharmony_ci    *       3DSTATE_WM::ForceThreadDispatch != 1 &&
1834bf215546Sopenharmony_ci    *       3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 &&
1835bf215546Sopenharmony_ci    *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
1836bf215546Sopenharmony_ci    *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
1837bf215546Sopenharmony_ci    *       3DSTATE_WM::EDSC_Mode != EDSC_PREPS &&
1838bf215546Sopenharmony_ci    *       3DSTATE_PS_EXTRA::PixelShaderValid &&
1839bf215546Sopenharmony_ci    *       no_hiz_op
1840bf215546Sopenharmony_ci    *
1841bf215546Sopenharmony_ci    * These are always true:
1842bf215546Sopenharmony_ci    *
1843bf215546Sopenharmony_ci    *    3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0
1844bf215546Sopenharmony_ci    *    3DSTATE_PS_EXTRA::PixelShaderValid
1845bf215546Sopenharmony_ci    *
1846bf215546Sopenharmony_ci    * Also, we never use the normal drawing path for HiZ ops; these are true:
1847bf215546Sopenharmony_ci    *
1848bf215546Sopenharmony_ci    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
1849bf215546Sopenharmony_ci    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
1850bf215546Sopenharmony_ci    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
1851bf215546Sopenharmony_ci    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
1852bf215546Sopenharmony_ci    *
1853bf215546Sopenharmony_ci    * This happens sometimes:
1854bf215546Sopenharmony_ci    *
1855bf215546Sopenharmony_ci    *    3DSTATE_WM::ForceThreadDispatch != 1
1856bf215546Sopenharmony_ci    *
1857bf215546Sopenharmony_ci    * However, we choose to ignore it as it either agrees with the signal
1858bf215546Sopenharmony_ci    * (dispatch was already enabled, so nothing out of the ordinary), or
1859bf215546Sopenharmony_ci    * there are no framebuffer attachments (so no depth or HiZ anyway,
1860bf215546Sopenharmony_ci    * meaning the PMA signal will already be disabled).
1861bf215546Sopenharmony_ci    */
1862bf215546Sopenharmony_ci
1863bf215546Sopenharmony_ci   if (!cso_fb->zsbuf)
1864bf215546Sopenharmony_ci      return false;
1865bf215546Sopenharmony_ci
1866bf215546Sopenharmony_ci   struct crocus_resource *zres, *sres;
1867bf215546Sopenharmony_ci   crocus_get_depth_stencil_resources(devinfo,
1868bf215546Sopenharmony_ci                                      cso_fb->zsbuf->texture, &zres, &sres);
1869bf215546Sopenharmony_ci
1870bf215546Sopenharmony_ci   /* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
1871bf215546Sopenharmony_ci    * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
1872bf215546Sopenharmony_ci    */
1873bf215546Sopenharmony_ci   if (!zres || !crocus_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level))
1874bf215546Sopenharmony_ci      return false;
1875bf215546Sopenharmony_ci
1876bf215546Sopenharmony_ci   /* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */
1877bf215546Sopenharmony_ci   if (wm_prog_data->early_fragment_tests)
1878bf215546Sopenharmony_ci      return false;
1879bf215546Sopenharmony_ci
1880bf215546Sopenharmony_ci   /* 3DSTATE_WM::ForceKillPix != ForceOff &&
1881bf215546Sopenharmony_ci    * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1882bf215546Sopenharmony_ci    *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1883bf215546Sopenharmony_ci    *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1884bf215546Sopenharmony_ci    *  3DSTATE_PS_BLEND::AlphaTestEnable ||
1885bf215546Sopenharmony_ci    *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1886bf215546Sopenharmony_ci    */
1887bf215546Sopenharmony_ci   bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||
1888bf215546Sopenharmony_ci                     cso_blend->cso.alpha_to_coverage || cso_zsa->cso.alpha_enabled;
1889bf215546Sopenharmony_ci
1890bf215546Sopenharmony_ci   /* The Gfx8 depth PMA equation becomes:
1891bf215546Sopenharmony_ci    *
1892bf215546Sopenharmony_ci    *    depth_writes =
1893bf215546Sopenharmony_ci    *       3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
1894bf215546Sopenharmony_ci    *       3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE
1895bf215546Sopenharmony_ci    *
1896bf215546Sopenharmony_ci    *    stencil_writes =
1897bf215546Sopenharmony_ci    *       3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
1898bf215546Sopenharmony_ci    *       3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
1899bf215546Sopenharmony_ci    *       3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE
1900bf215546Sopenharmony_ci    *
1901bf215546Sopenharmony_ci    *    Z_PMA_OPT =
1902bf215546Sopenharmony_ci    *       common_pma_fix &&
1903bf215546Sopenharmony_ci    *       3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable &&
1904bf215546Sopenharmony_ci    *       ((killpixels && (depth_writes || stencil_writes)) ||
1905bf215546Sopenharmony_ci    *        3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF)
1906bf215546Sopenharmony_ci    *
1907bf215546Sopenharmony_ci    */
1908bf215546Sopenharmony_ci   if (!cso_zsa->cso.depth_enabled)
1909bf215546Sopenharmony_ci      return false;
1910bf215546Sopenharmony_ci
1911bf215546Sopenharmony_ci   return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF ||
1912bf215546Sopenharmony_ci          (killpixels && (cso_zsa->depth_writes_enabled ||
1913bf215546Sopenharmony_ci                          (sres && cso_zsa->stencil_writes_enabled)));
1914bf215546Sopenharmony_ci}
1915bf215546Sopenharmony_ci#endif
1916bf215546Sopenharmony_civoid
1917bf215546Sopenharmony_cigenX(crocus_update_pma_fix)(struct crocus_context *ice,
1918bf215546Sopenharmony_ci                            struct crocus_batch *batch,
1919bf215546Sopenharmony_ci                            bool enable)
1920bf215546Sopenharmony_ci{
1921bf215546Sopenharmony_ci#if GFX_VER == 8
1922bf215546Sopenharmony_ci   struct crocus_genx_state *genx = ice->state.genx;
1923bf215546Sopenharmony_ci
1924bf215546Sopenharmony_ci   if (genx->pma_fix_enabled == enable)
1925bf215546Sopenharmony_ci      return;
1926bf215546Sopenharmony_ci
1927bf215546Sopenharmony_ci   genx->pma_fix_enabled = enable;
1928bf215546Sopenharmony_ci
1929bf215546Sopenharmony_ci   /* According to the Broadwell PIPE_CONTROL documentation, software should
1930bf215546Sopenharmony_ci    * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
1931bf215546Sopenharmony_ci    * prior to the LRI.  If stencil buffer writes are enabled, then a Render        * Cache Flush is also necessary.
1932bf215546Sopenharmony_ci    *
1933bf215546Sopenharmony_ci    * The Gfx9 docs say to use a depth stall rather than a command streamer
1934bf215546Sopenharmony_ci    * stall.  However, the hardware seems to violently disagree.  A full
1935bf215546Sopenharmony_ci    * command streamer stall seems to be needed in both cases.
1936bf215546Sopenharmony_ci    */
1937bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
1938bf215546Sopenharmony_ci                                  PIPE_CONTROL_CS_STALL |
1939bf215546Sopenharmony_ci                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1940bf215546Sopenharmony_ci                                  PIPE_CONTROL_RENDER_TARGET_FLUSH);
1941bf215546Sopenharmony_ci
1942bf215546Sopenharmony_ci   crocus_emit_reg(batch, GENX(CACHE_MODE_1), reg) {
1943bf215546Sopenharmony_ci      reg.NPPMAFixEnable = enable;
1944bf215546Sopenharmony_ci      reg.NPEarlyZFailsDisable = enable;
1945bf215546Sopenharmony_ci      reg.NPPMAFixEnableMask = true;
1946bf215546Sopenharmony_ci      reg.NPEarlyZFailsDisableMask = true;
1947bf215546Sopenharmony_ci   }
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_ci   /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
1950bf215546Sopenharmony_ci    * Flush bits is often necessary.  We do it regardless because it's easier.
1951bf215546Sopenharmony_ci    * The render cache flush is also necessary if stencil writes are enabled.
1952bf215546Sopenharmony_ci    *
1953bf215546Sopenharmony_ci    * Again, the Gfx9 docs give a different set of flushes but the Broadwell
1954bf215546Sopenharmony_ci    * flushes seem to work just as well.
1955bf215546Sopenharmony_ci    */
1956bf215546Sopenharmony_ci   crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
1957bf215546Sopenharmony_ci                                  PIPE_CONTROL_DEPTH_STALL |
1958bf215546Sopenharmony_ci                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1959bf215546Sopenharmony_ci                                  PIPE_CONTROL_RENDER_TARGET_FLUSH);
1960bf215546Sopenharmony_ci#endif
1961bf215546Sopenharmony_ci}
1962bf215546Sopenharmony_ci
1963bf215546Sopenharmony_cistatic float
1964bf215546Sopenharmony_ciget_line_width(const struct pipe_rasterizer_state *state)
1965bf215546Sopenharmony_ci{
1966bf215546Sopenharmony_ci   float line_width = state->line_width;
1967bf215546Sopenharmony_ci
1968bf215546Sopenharmony_ci   /* From the OpenGL 4.4 spec:
1969bf215546Sopenharmony_ci    *
1970bf215546Sopenharmony_ci    * "The actual width of non-antialiased lines is determined by rounding
1971bf215546Sopenharmony_ci    *  the supplied width to the nearest integer, then clamping it to the
1972bf215546Sopenharmony_ci    *  implementation-dependent maximum non-antialiased line width."
1973bf215546Sopenharmony_ci    */
1974bf215546Sopenharmony_ci   if (!state->multisample && !state->line_smooth)
1975bf215546Sopenharmony_ci      line_width = roundf(state->line_width);
1976bf215546Sopenharmony_ci
1977bf215546Sopenharmony_ci   if (!state->multisample && state->line_smooth && line_width < 1.5f) {
1978bf215546Sopenharmony_ci      /* For 1 pixel line thickness or less, the general anti-aliasing
1979bf215546Sopenharmony_ci       * algorithm gives up, and a garbage line is generated.  Setting a
1980bf215546Sopenharmony_ci       * Line Width of 0.0 specifies the rasterization of the "thinnest"
1981bf215546Sopenharmony_ci       * (one-pixel-wide), non-antialiased lines.
1982bf215546Sopenharmony_ci       *
1983bf215546Sopenharmony_ci       * Lines rendered with zero Line Width are rasterized using the
1984bf215546Sopenharmony_ci       * "Grid Intersection Quantization" rules as specified by the
1985bf215546Sopenharmony_ci       * "Zero-Width (Cosmetic) Line Rasterization" section of the docs.
1986bf215546Sopenharmony_ci       */
1987bf215546Sopenharmony_ci      /* hack around this for gfx4/5 fps counters in hud. */
1988bf215546Sopenharmony_ci      line_width = GFX_VER < 6 ? 1.5f : 0.0f;
1989bf215546Sopenharmony_ci   }
1990bf215546Sopenharmony_ci   return line_width;
1991bf215546Sopenharmony_ci}
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci/**
1994bf215546Sopenharmony_ci * The pipe->create_rasterizer_state() driver hook.
1995bf215546Sopenharmony_ci */
1996bf215546Sopenharmony_cistatic void *
1997bf215546Sopenharmony_cicrocus_create_rasterizer_state(struct pipe_context *ctx,
1998bf215546Sopenharmony_ci                               const struct pipe_rasterizer_state *state)
1999bf215546Sopenharmony_ci{
2000bf215546Sopenharmony_ci   struct crocus_rasterizer_state *cso =
2001bf215546Sopenharmony_ci      malloc(sizeof(struct crocus_rasterizer_state));
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_ci   cso->fill_mode_point_or_line =
2004bf215546Sopenharmony_ci      state->fill_front == PIPE_POLYGON_MODE_LINE ||
2005bf215546Sopenharmony_ci      state->fill_front == PIPE_POLYGON_MODE_POINT ||
2006bf215546Sopenharmony_ci      state->fill_back == PIPE_POLYGON_MODE_LINE ||
2007bf215546Sopenharmony_ci      state->fill_back == PIPE_POLYGON_MODE_POINT;
2008bf215546Sopenharmony_ci
2009bf215546Sopenharmony_ci   if (state->clip_plane_enable != 0)
2010bf215546Sopenharmony_ci      cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1;
2011bf215546Sopenharmony_ci   else
2012bf215546Sopenharmony_ci      cso->num_clip_plane_consts = 0;
2013bf215546Sopenharmony_ci
2014bf215546Sopenharmony_ci   cso->cso = *state;
2015bf215546Sopenharmony_ci
2016bf215546Sopenharmony_ci#if GFX_VER >= 6
2017bf215546Sopenharmony_ci   float line_width = get_line_width(state);
2018bf215546Sopenharmony_ci
2019bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
2020bf215546Sopenharmony_ci      sf.StatisticsEnable = true;
2021bf215546Sopenharmony_ci      sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
2022bf215546Sopenharmony_ci      sf.LineEndCapAntialiasingRegionWidth =
2023bf215546Sopenharmony_ci         state->line_smooth ? _10pixels : _05pixels;
2024bf215546Sopenharmony_ci      sf.LastPixelEnable = state->line_last_pixel;
2025bf215546Sopenharmony_ci#if GFX_VER <= 7
2026bf215546Sopenharmony_ci      sf.AntialiasingEnable = state->line_smooth;
2027bf215546Sopenharmony_ci#endif
2028bf215546Sopenharmony_ci#if GFX_VER == 8
2029bf215546Sopenharmony_ci      struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2030bf215546Sopenharmony_ci      if (screen->devinfo.platform == INTEL_PLATFORM_CHV)
2031bf215546Sopenharmony_ci         sf.CHVLineWidth = line_width;
2032bf215546Sopenharmony_ci      else
2033bf215546Sopenharmony_ci         sf.LineWidth = line_width;
2034bf215546Sopenharmony_ci#else
2035bf215546Sopenharmony_ci      sf.LineWidth = line_width;
2036bf215546Sopenharmony_ci#endif
2037bf215546Sopenharmony_ci      sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
2038bf215546Sopenharmony_ci      sf.PointWidth = state->point_size;
2039bf215546Sopenharmony_ci
2040bf215546Sopenharmony_ci      if (state->flatshade_first) {
2041bf215546Sopenharmony_ci         sf.TriangleFanProvokingVertexSelect = 1;
2042bf215546Sopenharmony_ci      } else {
2043bf215546Sopenharmony_ci         sf.TriangleStripListProvokingVertexSelect = 2;
2044bf215546Sopenharmony_ci         sf.TriangleFanProvokingVertexSelect = 2;
2045bf215546Sopenharmony_ci         sf.LineStripListProvokingVertexSelect = 1;
2046bf215546Sopenharmony_ci      }
2047bf215546Sopenharmony_ci
2048bf215546Sopenharmony_ci#if GFX_VER == 6
2049bf215546Sopenharmony_ci      sf.AttributeSwizzleEnable = true;
2050bf215546Sopenharmony_ci      if (state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
2051bf215546Sopenharmony_ci         sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
2052bf215546Sopenharmony_ci      else
2053bf215546Sopenharmony_ci         sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
2054bf215546Sopenharmony_ci#endif
2055bf215546Sopenharmony_ci
2056bf215546Sopenharmony_ci#if GFX_VER <= 7
2057bf215546Sopenharmony_ci      sf.FrontWinding = state->front_ccw ? 1 : 0; // Or the other way...
2058bf215546Sopenharmony_ci
2059bf215546Sopenharmony_ci#if GFX_VER >= 6
2060bf215546Sopenharmony_ci      sf.GlobalDepthOffsetEnableSolid = state->offset_tri;
2061bf215546Sopenharmony_ci      sf.GlobalDepthOffsetEnableWireframe = state->offset_line;
2062bf215546Sopenharmony_ci      sf.GlobalDepthOffsetEnablePoint = state->offset_point;
2063bf215546Sopenharmony_ci      sf.GlobalDepthOffsetConstant = state->offset_units * 2;
2064bf215546Sopenharmony_ci      sf.GlobalDepthOffsetScale = state->offset_scale;
2065bf215546Sopenharmony_ci      sf.GlobalDepthOffsetClamp = state->offset_clamp;
2066bf215546Sopenharmony_ci
2067bf215546Sopenharmony_ci      sf.FrontFaceFillMode = translate_fill_mode(state->fill_front);
2068bf215546Sopenharmony_ci      sf.BackFaceFillMode = translate_fill_mode(state->fill_back);
2069bf215546Sopenharmony_ci#endif
2070bf215546Sopenharmony_ci
2071bf215546Sopenharmony_ci      sf.CullMode = translate_cull_mode(state->cull_face);
2072bf215546Sopenharmony_ci      sf.ScissorRectangleEnable = true;
2073bf215546Sopenharmony_ci
2074bf215546Sopenharmony_ci#if GFX_VERx10 == 75
2075bf215546Sopenharmony_ci      sf.LineStippleEnable = state->line_stipple_enable;
2076bf215546Sopenharmony_ci#endif
2077bf215546Sopenharmony_ci#endif
2078bf215546Sopenharmony_ci   }
2079bf215546Sopenharmony_ci#endif
2080bf215546Sopenharmony_ci
2081bf215546Sopenharmony_ci#if GFX_VER == 8
2082bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
2083bf215546Sopenharmony_ci      rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
2084bf215546Sopenharmony_ci      rr.CullMode = translate_cull_mode(state->cull_face);
2085bf215546Sopenharmony_ci      rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
2086bf215546Sopenharmony_ci      rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
2087bf215546Sopenharmony_ci      rr.DXMultisampleRasterizationEnable = state->multisample;
2088bf215546Sopenharmony_ci      rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
2089bf215546Sopenharmony_ci      rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
2090bf215546Sopenharmony_ci      rr.GlobalDepthOffsetEnablePoint = state->offset_point;
2091bf215546Sopenharmony_ci      rr.GlobalDepthOffsetConstant = state->offset_units * 2;
2092bf215546Sopenharmony_ci      rr.GlobalDepthOffsetScale = state->offset_scale;
2093bf215546Sopenharmony_ci      rr.GlobalDepthOffsetClamp = state->offset_clamp;
2094bf215546Sopenharmony_ci      rr.SmoothPointEnable = state->point_smooth;
2095bf215546Sopenharmony_ci      rr.AntialiasingEnable = state->line_smooth;
2096bf215546Sopenharmony_ci      rr.ScissorRectangleEnable = state->scissor;
2097bf215546Sopenharmony_ci      rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);
2098bf215546Sopenharmony_ci   }
2099bf215546Sopenharmony_ci#endif
2100bf215546Sopenharmony_ci
2101bf215546Sopenharmony_ci#if GFX_VER >= 6
2102bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
2103bf215546Sopenharmony_ci      /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from
2104bf215546Sopenharmony_ci       * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.
2105bf215546Sopenharmony_ci       */
2106bf215546Sopenharmony_ci#if GFX_VER >= 7
2107bf215546Sopenharmony_ci      cl.EarlyCullEnable = true;
2108bf215546Sopenharmony_ci#endif
2109bf215546Sopenharmony_ci
2110bf215546Sopenharmony_ci#if GFX_VER == 7
2111bf215546Sopenharmony_ci      cl.FrontWinding = state->front_ccw ? 1 : 0;
2112bf215546Sopenharmony_ci      cl.CullMode = translate_cull_mode(state->cull_face);
2113bf215546Sopenharmony_ci#endif
2114bf215546Sopenharmony_ci      cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
2115bf215546Sopenharmony_ci#if GFX_VER < 8
2116bf215546Sopenharmony_ci      cl.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);
2117bf215546Sopenharmony_ci#endif
2118bf215546Sopenharmony_ci      cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
2119bf215546Sopenharmony_ci      cl.GuardbandClipTestEnable = true;
2120bf215546Sopenharmony_ci      cl.ClipEnable = true;
2121bf215546Sopenharmony_ci      cl.MinimumPointWidth = 0.125;
2122bf215546Sopenharmony_ci      cl.MaximumPointWidth = 255.875;
2123bf215546Sopenharmony_ci
2124bf215546Sopenharmony_ci#if GFX_VER == 8
2125bf215546Sopenharmony_ci      cl.ForceUserClipDistanceClipTestEnableBitmask = true;
2126bf215546Sopenharmony_ci#endif
2127bf215546Sopenharmony_ci
2128bf215546Sopenharmony_ci      if (state->flatshade_first) {
2129bf215546Sopenharmony_ci         cl.TriangleFanProvokingVertexSelect = 1;
2130bf215546Sopenharmony_ci      } else {
2131bf215546Sopenharmony_ci         cl.TriangleStripListProvokingVertexSelect = 2;
2132bf215546Sopenharmony_ci         cl.TriangleFanProvokingVertexSelect = 2;
2133bf215546Sopenharmony_ci         cl.LineStripListProvokingVertexSelect = 1;
2134bf215546Sopenharmony_ci      }
2135bf215546Sopenharmony_ci   }
2136bf215546Sopenharmony_ci#endif
2137bf215546Sopenharmony_ci
2138bf215546Sopenharmony_ci   /* Remap from 0..255 back to 1..256 */
2139bf215546Sopenharmony_ci   const unsigned line_stipple_factor = state->line_stipple_factor + 1;
2140bf215546Sopenharmony_ci
2141bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
2142bf215546Sopenharmony_ci      if (state->line_stipple_enable) {
2143bf215546Sopenharmony_ci         line.LineStipplePattern = state->line_stipple_pattern;
2144bf215546Sopenharmony_ci         line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
2145bf215546Sopenharmony_ci         line.LineStippleRepeatCount = line_stipple_factor;
2146bf215546Sopenharmony_ci      }
2147bf215546Sopenharmony_ci   }
2148bf215546Sopenharmony_ci
2149bf215546Sopenharmony_ci   return cso;
2150bf215546Sopenharmony_ci}
2151bf215546Sopenharmony_ci
2152bf215546Sopenharmony_ci/**
2153bf215546Sopenharmony_ci * The pipe->bind_rasterizer_state() driver hook.
2154bf215546Sopenharmony_ci *
2155bf215546Sopenharmony_ci * Bind a rasterizer CSO and flag related dirty bits.
2156bf215546Sopenharmony_ci */
2157bf215546Sopenharmony_cistatic void
2158bf215546Sopenharmony_cicrocus_bind_rasterizer_state(struct pipe_context *ctx, void *state)
2159bf215546Sopenharmony_ci{
2160bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
2161bf215546Sopenharmony_ci   struct crocus_rasterizer_state *old_cso = ice->state.cso_rast;
2162bf215546Sopenharmony_ci   struct crocus_rasterizer_state *new_cso = state;
2163bf215546Sopenharmony_ci
2164bf215546Sopenharmony_ci   if (new_cso) {
2165bf215546Sopenharmony_ci      /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
2166bf215546Sopenharmony_ci      if (cso_changed_memcmp(line_stipple))
2167bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_LINE_STIPPLE;
2168bf215546Sopenharmony_ci#if GFX_VER >= 6
2169bf215546Sopenharmony_ci      if (cso_changed(cso.half_pixel_center))
2170bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE;
2171bf215546Sopenharmony_ci      if (cso_changed(cso.scissor))
2172bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
2173bf215546Sopenharmony_ci      if (cso_changed(cso.multisample))
2174bf215546Sopenharmony_ci	 ice->state.dirty |= CROCUS_DIRTY_WM;
2175bf215546Sopenharmony_ci#else
2176bf215546Sopenharmony_ci      if (cso_changed(cso.scissor))
2177bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
2178bf215546Sopenharmony_ci#endif
2179bf215546Sopenharmony_ci
2180bf215546Sopenharmony_ci      if (cso_changed(cso.line_stipple_enable) || cso_changed(cso.poly_stipple_enable))
2181bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_WM;
2182bf215546Sopenharmony_ci
2183bf215546Sopenharmony_ci#if GFX_VER >= 6
2184bf215546Sopenharmony_ci      if (cso_changed(cso.rasterizer_discard))
2185bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
2186bf215546Sopenharmony_ci
2187bf215546Sopenharmony_ci      if (cso_changed(cso.flatshade_first))
2188bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;
2189bf215546Sopenharmony_ci#endif
2190bf215546Sopenharmony_ci
2191bf215546Sopenharmony_ci      if (cso_changed(cso.depth_clip_near) || cso_changed(cso.depth_clip_far) ||
2192bf215546Sopenharmony_ci          cso_changed(cso.clip_halfz))
2193bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
2194bf215546Sopenharmony_ci
2195bf215546Sopenharmony_ci#if GFX_VER >= 7
2196bf215546Sopenharmony_ci      if (cso_changed(cso.sprite_coord_enable) ||
2197bf215546Sopenharmony_ci          cso_changed(cso.sprite_coord_mode) ||
2198bf215546Sopenharmony_ci          cso_changed(cso.light_twoside))
2199bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
2200bf215546Sopenharmony_ci#endif
2201bf215546Sopenharmony_ci#if GFX_VER <= 5
2202bf215546Sopenharmony_ci      if (cso_changed(cso.clip_plane_enable))
2203bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;
2204bf215546Sopenharmony_ci#endif
2205bf215546Sopenharmony_ci   }
2206bf215546Sopenharmony_ci
2207bf215546Sopenharmony_ci   ice->state.cso_rast = new_cso;
2208bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_RASTER;
2209bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_CLIP;
2210bf215546Sopenharmony_ci#if GFX_VER <= 5
2211bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
2212bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_WM;
2213bf215546Sopenharmony_ci#endif
2214bf215546Sopenharmony_ci#if GFX_VER <= 6
2215bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
2216bf215546Sopenharmony_ci#endif
2217bf215546Sopenharmony_ci   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_RASTERIZER];
2218bf215546Sopenharmony_ci}
2219bf215546Sopenharmony_ci
2220bf215546Sopenharmony_ci/**
2221bf215546Sopenharmony_ci * Return true if the given wrap mode requires the border color to exist.
2222bf215546Sopenharmony_ci *
2223bf215546Sopenharmony_ci * (We can skip uploading it if the sampler isn't going to use it.)
2224bf215546Sopenharmony_ci */
2225bf215546Sopenharmony_cistatic bool
2226bf215546Sopenharmony_ciwrap_mode_needs_border_color(unsigned wrap_mode)
2227bf215546Sopenharmony_ci{
2228bf215546Sopenharmony_ci#if GFX_VER == 8
2229bf215546Sopenharmony_ci   return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
2230bf215546Sopenharmony_ci#else
2231bf215546Sopenharmony_ci   return wrap_mode == TCM_CLAMP_BORDER;
2232bf215546Sopenharmony_ci#endif
2233bf215546Sopenharmony_ci}
2234bf215546Sopenharmony_ci
2235bf215546Sopenharmony_ci/**
2236bf215546Sopenharmony_ci * Gallium CSO for sampler state.
2237bf215546Sopenharmony_ci */
2238bf215546Sopenharmony_cistruct crocus_sampler_state {
2239bf215546Sopenharmony_ci   struct pipe_sampler_state pstate;
2240bf215546Sopenharmony_ci   union pipe_color_union border_color;
2241bf215546Sopenharmony_ci   bool needs_border_color;
2242bf215546Sopenharmony_ci   unsigned wrap_s;
2243bf215546Sopenharmony_ci   unsigned wrap_t;
2244bf215546Sopenharmony_ci   unsigned wrap_r;
2245bf215546Sopenharmony_ci   unsigned mag_img_filter;
2246bf215546Sopenharmony_ci   float min_lod;
2247bf215546Sopenharmony_ci};
2248bf215546Sopenharmony_ci
2249bf215546Sopenharmony_ci/**
2250bf215546Sopenharmony_ci * The pipe->create_sampler_state() driver hook.
2251bf215546Sopenharmony_ci *
2252bf215546Sopenharmony_ci * We fill out SAMPLER_STATE (except for the border color pointer), and
2253bf215546Sopenharmony_ci * store that on the CPU.  It doesn't make sense to upload it to a GPU
2254bf215546Sopenharmony_ci * buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires
2255bf215546Sopenharmony_ci * all bound sampler states to be in contiguous memor.
2256bf215546Sopenharmony_ci */
2257bf215546Sopenharmony_cistatic void *
2258bf215546Sopenharmony_cicrocus_create_sampler_state(struct pipe_context *ctx,
2259bf215546Sopenharmony_ci                            const struct pipe_sampler_state *state)
2260bf215546Sopenharmony_ci{
2261bf215546Sopenharmony_ci   struct crocus_sampler_state *cso = CALLOC_STRUCT(crocus_sampler_state);
2262bf215546Sopenharmony_ci
2263bf215546Sopenharmony_ci   if (!cso)
2264bf215546Sopenharmony_ci      return NULL;
2265bf215546Sopenharmony_ci
2266bf215546Sopenharmony_ci   STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
2267bf215546Sopenharmony_ci   STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
2268bf215546Sopenharmony_ci
2269bf215546Sopenharmony_ci   bool either_nearest = state->min_img_filter == PIPE_TEX_FILTER_NEAREST ||
2270bf215546Sopenharmony_ci      state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
2271bf215546Sopenharmony_ci   cso->wrap_s = translate_wrap(state->wrap_s, either_nearest);
2272bf215546Sopenharmony_ci   cso->wrap_t = translate_wrap(state->wrap_t, either_nearest);
2273bf215546Sopenharmony_ci   cso->wrap_r = translate_wrap(state->wrap_r, either_nearest);
2274bf215546Sopenharmony_ci
2275bf215546Sopenharmony_ci   cso->pstate = *state;
2276bf215546Sopenharmony_ci
2277bf215546Sopenharmony_ci   memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color));
2278bf215546Sopenharmony_ci
2279bf215546Sopenharmony_ci   cso->needs_border_color = wrap_mode_needs_border_color(cso->wrap_s) ||
2280bf215546Sopenharmony_ci                             wrap_mode_needs_border_color(cso->wrap_t) ||
2281bf215546Sopenharmony_ci                             wrap_mode_needs_border_color(cso->wrap_r);
2282bf215546Sopenharmony_ci
2283bf215546Sopenharmony_ci   cso->min_lod = state->min_lod;
2284bf215546Sopenharmony_ci   cso->mag_img_filter = state->mag_img_filter;
2285bf215546Sopenharmony_ci
2286bf215546Sopenharmony_ci   // XXX: explain this code ported from ilo...I don't get it at all...
2287bf215546Sopenharmony_ci   if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2288bf215546Sopenharmony_ci       state->min_lod > 0.0f) {
2289bf215546Sopenharmony_ci      cso->min_lod = 0.0f;
2290bf215546Sopenharmony_ci      cso->mag_img_filter = state->min_img_filter;
2291bf215546Sopenharmony_ci   }
2292bf215546Sopenharmony_ci
2293bf215546Sopenharmony_ci   return cso;
2294bf215546Sopenharmony_ci}
2295bf215546Sopenharmony_ci
2296bf215546Sopenharmony_ci/**
2297bf215546Sopenharmony_ci * The pipe->bind_sampler_states() driver hook.
2298bf215546Sopenharmony_ci */
2299bf215546Sopenharmony_cistatic void
2300bf215546Sopenharmony_cicrocus_bind_sampler_states(struct pipe_context *ctx,
2301bf215546Sopenharmony_ci                           enum pipe_shader_type p_stage,
2302bf215546Sopenharmony_ci                           unsigned start, unsigned count,
2303bf215546Sopenharmony_ci                           void **states)
2304bf215546Sopenharmony_ci{
2305bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
2306bf215546Sopenharmony_ci   gl_shader_stage stage = stage_from_pipe(p_stage);
2307bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
2308bf215546Sopenharmony_ci
2309bf215546Sopenharmony_ci   assert(start + count <= CROCUS_MAX_TEXTURE_SAMPLERS);
2310bf215546Sopenharmony_ci
2311bf215546Sopenharmony_ci   bool dirty = false;
2312bf215546Sopenharmony_ci
2313bf215546Sopenharmony_ci   for (int i = 0; i < count; i++) {
2314bf215546Sopenharmony_ci      if (shs->samplers[start + i] != states[i]) {
2315bf215546Sopenharmony_ci         shs->samplers[start + i] = states[i];
2316bf215546Sopenharmony_ci         dirty = true;
2317bf215546Sopenharmony_ci      }
2318bf215546Sopenharmony_ci   }
2319bf215546Sopenharmony_ci
2320bf215546Sopenharmony_ci   if (dirty) {
2321bf215546Sopenharmony_ci#if GFX_VER <= 5
2322bf215546Sopenharmony_ci      if (p_stage == PIPE_SHADER_FRAGMENT)
2323bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_WM;
2324bf215546Sopenharmony_ci      else if (p_stage == PIPE_SHADER_VERTEX)
2325bf215546Sopenharmony_ci         ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
2326bf215546Sopenharmony_ci#endif
2327bf215546Sopenharmony_ci      ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2328bf215546Sopenharmony_ci      ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES];
2329bf215546Sopenharmony_ci   }
2330bf215546Sopenharmony_ci}
2331bf215546Sopenharmony_ci
2332bf215546Sopenharmony_cienum samp_workaround {
2333bf215546Sopenharmony_ci   SAMP_NORMAL,
2334bf215546Sopenharmony_ci   SAMP_CUBE_CLAMP,
2335bf215546Sopenharmony_ci   SAMP_CUBE_CUBE,
2336bf215546Sopenharmony_ci   SAMP_T_WRAP,
2337bf215546Sopenharmony_ci};
2338bf215546Sopenharmony_ci
2339bf215546Sopenharmony_cistatic void
2340bf215546Sopenharmony_cicrocus_upload_sampler_state(struct crocus_batch *batch,
2341bf215546Sopenharmony_ci                            struct crocus_sampler_state *cso,
2342bf215546Sopenharmony_ci                            uint32_t border_color_offset,
2343bf215546Sopenharmony_ci                            enum samp_workaround samp_workaround,
2344bf215546Sopenharmony_ci                            uint32_t first_level,
2345bf215546Sopenharmony_ci                            void *map)
2346bf215546Sopenharmony_ci{
2347bf215546Sopenharmony_ci   struct pipe_sampler_state *state = &cso->pstate;
2348bf215546Sopenharmony_ci   uint32_t wrap_s, wrap_t, wrap_r;
2349bf215546Sopenharmony_ci
2350bf215546Sopenharmony_ci   wrap_s = cso->wrap_s;
2351bf215546Sopenharmony_ci   wrap_t = cso->wrap_t;
2352bf215546Sopenharmony_ci   wrap_r = cso->wrap_r;
2353bf215546Sopenharmony_ci
2354bf215546Sopenharmony_ci   switch (samp_workaround) {
2355bf215546Sopenharmony_ci   case SAMP_CUBE_CLAMP:
2356bf215546Sopenharmony_ci      wrap_s = TCM_CLAMP;
2357bf215546Sopenharmony_ci      wrap_t = TCM_CLAMP;
2358bf215546Sopenharmony_ci      wrap_r = TCM_CLAMP;
2359bf215546Sopenharmony_ci      break;
2360bf215546Sopenharmony_ci   case SAMP_CUBE_CUBE:
2361bf215546Sopenharmony_ci      wrap_s = TCM_CUBE;
2362bf215546Sopenharmony_ci      wrap_t = TCM_CUBE;
2363bf215546Sopenharmony_ci      wrap_r = TCM_CUBE;
2364bf215546Sopenharmony_ci      break;
2365bf215546Sopenharmony_ci   case SAMP_T_WRAP:
2366bf215546Sopenharmony_ci      wrap_t = TCM_WRAP;
2367bf215546Sopenharmony_ci      break;
2368bf215546Sopenharmony_ci   default:
2369bf215546Sopenharmony_ci      break;
2370bf215546Sopenharmony_ci   }
2371bf215546Sopenharmony_ci
2372bf215546Sopenharmony_ci   _crocus_pack_state(batch, GENX(SAMPLER_STATE), map, samp) {
2373bf215546Sopenharmony_ci      samp.TCXAddressControlMode = wrap_s;
2374bf215546Sopenharmony_ci      samp.TCYAddressControlMode = wrap_t;
2375bf215546Sopenharmony_ci      samp.TCZAddressControlMode = wrap_r;
2376bf215546Sopenharmony_ci
2377bf215546Sopenharmony_ci#if GFX_VER >= 6
2378bf215546Sopenharmony_ci      samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
2379bf215546Sopenharmony_ci#endif
2380bf215546Sopenharmony_ci      samp.MinModeFilter = state->min_img_filter;
2381bf215546Sopenharmony_ci      samp.MagModeFilter = cso->mag_img_filter;
2382bf215546Sopenharmony_ci      samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
2383bf215546Sopenharmony_ci      samp.MaximumAnisotropy = RATIO21;
2384bf215546Sopenharmony_ci
2385bf215546Sopenharmony_ci      if (state->max_anisotropy >= 2) {
2386bf215546Sopenharmony_ci         if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2387bf215546Sopenharmony_ci            samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
2388bf215546Sopenharmony_ci#if GFX_VER >= 7
2389bf215546Sopenharmony_ci            samp.AnisotropicAlgorithm = EWAApproximation;
2390bf215546Sopenharmony_ci#endif
2391bf215546Sopenharmony_ci         }
2392bf215546Sopenharmony_ci
2393bf215546Sopenharmony_ci         if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
2394bf215546Sopenharmony_ci            samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
2395bf215546Sopenharmony_ci
2396bf215546Sopenharmony_ci         samp.MaximumAnisotropy =
2397bf215546Sopenharmony_ci            MIN2((state->max_anisotropy - 2) / 2, RATIO161);
2398bf215546Sopenharmony_ci      }
2399bf215546Sopenharmony_ci
2400bf215546Sopenharmony_ci      /* Set address rounding bits if not using nearest filtering. */
2401bf215546Sopenharmony_ci      if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
2402bf215546Sopenharmony_ci         samp.UAddressMinFilterRoundingEnable = true;
2403bf215546Sopenharmony_ci         samp.VAddressMinFilterRoundingEnable = true;
2404bf215546Sopenharmony_ci         samp.RAddressMinFilterRoundingEnable = true;
2405bf215546Sopenharmony_ci      }
2406bf215546Sopenharmony_ci
2407bf215546Sopenharmony_ci      if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
2408bf215546Sopenharmony_ci         samp.UAddressMagFilterRoundingEnable = true;
2409bf215546Sopenharmony_ci         samp.VAddressMagFilterRoundingEnable = true;
2410bf215546Sopenharmony_ci         samp.RAddressMagFilterRoundingEnable = true;
2411bf215546Sopenharmony_ci      }
2412bf215546Sopenharmony_ci
2413bf215546Sopenharmony_ci      if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2414bf215546Sopenharmony_ci         samp.ShadowFunction = translate_shadow_func(state->compare_func);
2415bf215546Sopenharmony_ci
2416bf215546Sopenharmony_ci      const float hw_max_lod = GFX_VER >= 7 ? 14 : 13;
2417bf215546Sopenharmony_ci
2418bf215546Sopenharmony_ci#if GFX_VER == 8
2419bf215546Sopenharmony_ci      samp.LODPreClampMode = CLAMP_MODE_OGL;
2420bf215546Sopenharmony_ci#else
2421bf215546Sopenharmony_ci      samp.LODPreClampEnable = true;
2422bf215546Sopenharmony_ci#endif
2423bf215546Sopenharmony_ci      samp.MinLOD = CLAMP(cso->min_lod, 0, hw_max_lod);
2424bf215546Sopenharmony_ci      samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
2425bf215546Sopenharmony_ci      samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
2426bf215546Sopenharmony_ci
2427bf215546Sopenharmony_ci#if GFX_VER == 6
2428bf215546Sopenharmony_ci      samp.BaseMipLevel = CLAMP(first_level, 0, hw_max_lod);
2429bf215546Sopenharmony_ci      samp.MinandMagStateNotEqual = samp.MinModeFilter != samp.MagModeFilter;
2430bf215546Sopenharmony_ci#endif
2431bf215546Sopenharmony_ci
2432bf215546Sopenharmony_ci#if GFX_VER < 6
2433bf215546Sopenharmony_ci      samp.BorderColorPointer =
2434bf215546Sopenharmony_ci         ro_bo(batch->state.bo, border_color_offset);
2435bf215546Sopenharmony_ci#else
2436bf215546Sopenharmony_ci      samp.BorderColorPointer = border_color_offset;
2437bf215546Sopenharmony_ci#endif
2438bf215546Sopenharmony_ci   }
2439bf215546Sopenharmony_ci}
2440bf215546Sopenharmony_ci
2441bf215546Sopenharmony_cistatic void
2442bf215546Sopenharmony_cicrocus_upload_border_color(struct crocus_batch *batch,
2443bf215546Sopenharmony_ci                           struct crocus_sampler_state *cso,
2444bf215546Sopenharmony_ci                           struct crocus_sampler_view *tex,
2445bf215546Sopenharmony_ci                           uint32_t *bc_offset)
2446bf215546Sopenharmony_ci{
2447bf215546Sopenharmony_ci   /* We may need to swizzle the border color for format faking.
2448bf215546Sopenharmony_ci    * A/LA formats are faked as R/RG with 000R or R00G swizzles.
2449bf215546Sopenharmony_ci    * This means we need to move the border color's A channel into
2450bf215546Sopenharmony_ci    * the R or G channels so that those read swizzles will move it
2451bf215546Sopenharmony_ci    * back into A.
2452bf215546Sopenharmony_ci    */
2453bf215546Sopenharmony_ci   enum pipe_format internal_format = PIPE_FORMAT_NONE;
2454bf215546Sopenharmony_ci   union pipe_color_union *color = &cso->border_color;
2455bf215546Sopenharmony_ci   union pipe_color_union tmp;
2456bf215546Sopenharmony_ci   if (tex) {
2457bf215546Sopenharmony_ci      internal_format = tex->res->internal_format;
2458bf215546Sopenharmony_ci
2459bf215546Sopenharmony_ci      if (util_format_is_alpha(internal_format)) {
2460bf215546Sopenharmony_ci         unsigned char swz[4] = {
2461bf215546Sopenharmony_ci            PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
2462bf215546Sopenharmony_ci            PIPE_SWIZZLE_0, PIPE_SWIZZLE_W,
2463bf215546Sopenharmony_ci         };
2464bf215546Sopenharmony_ci         util_format_apply_color_swizzle(&tmp, color, swz, true);
2465bf215546Sopenharmony_ci         color = &tmp;
2466bf215546Sopenharmony_ci      } else if (util_format_is_luminance_alpha(internal_format) &&
2467bf215546Sopenharmony_ci                 internal_format != PIPE_FORMAT_L8A8_SRGB) {
2468bf215546Sopenharmony_ci         unsigned char swz[4] = {
2469bf215546Sopenharmony_ci            PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
2470bf215546Sopenharmony_ci            PIPE_SWIZZLE_X, PIPE_SWIZZLE_W
2471bf215546Sopenharmony_ci         };
2472bf215546Sopenharmony_ci         util_format_apply_color_swizzle(&tmp, color, swz, true);
2473bf215546Sopenharmony_ci         color = &tmp;
2474bf215546Sopenharmony_ci      }
2475bf215546Sopenharmony_ci   }
2476bf215546Sopenharmony_ci   bool is_integer_format = util_format_is_pure_integer(internal_format);
2477bf215546Sopenharmony_ci   unsigned sbc_size = GENX(SAMPLER_BORDER_COLOR_STATE_length) * 4;
2478bf215546Sopenharmony_ci   const int sbc_align = (GFX_VER == 8 ? 64 : ((GFX_VERx10 == 75 && is_integer_format) ? 512 : 32));
2479bf215546Sopenharmony_ci   uint32_t *sbc = stream_state(batch, sbc_size, sbc_align, bc_offset);
2480bf215546Sopenharmony_ci
2481bf215546Sopenharmony_ci   struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 };
2482bf215546Sopenharmony_ci
2483bf215546Sopenharmony_ci#define ASSIGN(dst, src)                        \
2484bf215546Sopenharmony_ci   do {                                         \
2485bf215546Sopenharmony_ci      dst = src;                                \
2486bf215546Sopenharmony_ci   } while (0)
2487bf215546Sopenharmony_ci
2488bf215546Sopenharmony_ci#define ASSIGNu16(dst, src)                     \
2489bf215546Sopenharmony_ci   do {                                         \
2490bf215546Sopenharmony_ci      dst = (uint16_t)src;                      \
2491bf215546Sopenharmony_ci   } while (0)
2492bf215546Sopenharmony_ci
2493bf215546Sopenharmony_ci#define ASSIGNu8(dst, src)                      \
2494bf215546Sopenharmony_ci   do {                                         \
2495bf215546Sopenharmony_ci      dst = (uint8_t)src;                       \
2496bf215546Sopenharmony_ci   } while (0)
2497bf215546Sopenharmony_ci
2498bf215546Sopenharmony_ci#define BORDER_COLOR_ATTR(macro, _color_type, src)              \
2499bf215546Sopenharmony_ci   macro(state.BorderColor ## _color_type ## Red, src[0]);      \
2500bf215546Sopenharmony_ci   macro(state.BorderColor ## _color_type ## Green, src[1]);    \
2501bf215546Sopenharmony_ci   macro(state.BorderColor ## _color_type ## Blue, src[2]);     \
2502bf215546Sopenharmony_ci   macro(state.BorderColor ## _color_type ## Alpha, src[3]);
2503bf215546Sopenharmony_ci
2504bf215546Sopenharmony_ci#if GFX_VER >= 8
2505bf215546Sopenharmony_ci   /* On Broadwell, the border color is represented as four 32-bit floats,
2506bf215546Sopenharmony_ci    * integers, or unsigned values, interpreted according to the surface
2507bf215546Sopenharmony_ci    * format.  This matches the sampler->BorderColor union exactly; just
2508bf215546Sopenharmony_ci    * memcpy the values.
2509bf215546Sopenharmony_ci    */
2510bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(ASSIGN, 32bit, color->ui);
2511bf215546Sopenharmony_ci#elif GFX_VERx10 == 75
2512bf215546Sopenharmony_ci   if (is_integer_format) {
2513bf215546Sopenharmony_ci      const struct util_format_description *format_desc =
2514bf215546Sopenharmony_ci         util_format_description(internal_format);
2515bf215546Sopenharmony_ci
2516bf215546Sopenharmony_ci      /* From the Haswell PRM, "Command Reference: Structures", Page 36:
2517bf215546Sopenharmony_ci       * "If any color channel is missing from the surface format,
2518bf215546Sopenharmony_ci       *  corresponding border color should be programmed as zero and if
2519bf215546Sopenharmony_ci       *  alpha channel is missing, corresponding Alpha border color should
2520bf215546Sopenharmony_ci       *  be programmed as 1."
2521bf215546Sopenharmony_ci       */
2522bf215546Sopenharmony_ci      unsigned c[4] = { 0, 0, 0, 1 };
2523bf215546Sopenharmony_ci      for (int i = 0; i < 4; i++) {
2524bf215546Sopenharmony_ci         if (format_desc->channel[i].size)
2525bf215546Sopenharmony_ci            c[i] = color->ui[i];
2526bf215546Sopenharmony_ci      }
2527bf215546Sopenharmony_ci
2528bf215546Sopenharmony_ci      switch (format_desc->channel[0].size) {
2529bf215546Sopenharmony_ci      case 8:
2530bf215546Sopenharmony_ci         /* Copy RGBA in order. */
2531bf215546Sopenharmony_ci         BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c);
2532bf215546Sopenharmony_ci         break;
2533bf215546Sopenharmony_ci      case 10:
2534bf215546Sopenharmony_ci         /* R10G10B10A2_UINT is treated like a 16-bit format. */
2535bf215546Sopenharmony_ci      case 16:
2536bf215546Sopenharmony_ci         BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c);
2537bf215546Sopenharmony_ci         break;
2538bf215546Sopenharmony_ci      case 32:
2539bf215546Sopenharmony_ci         if (format_desc->channel[1].size && !format_desc->channel[2].size) {
2540bf215546Sopenharmony_ci            /* Careful inspection of the tables reveals that for RG32 formats,
2541bf215546Sopenharmony_ci             * the green channel needs to go where blue normally belongs.
2542bf215546Sopenharmony_ci             */
2543bf215546Sopenharmony_ci            state.BorderColor32bitRed = c[0];
2544bf215546Sopenharmony_ci            state.BorderColor32bitBlue = c[1];
2545bf215546Sopenharmony_ci            state.BorderColor32bitAlpha = 1;
2546bf215546Sopenharmony_ci         } else {
2547bf215546Sopenharmony_ci            /* Copy RGBA in order. */
2548bf215546Sopenharmony_ci            BORDER_COLOR_ATTR(ASSIGN, 32bit, c);
2549bf215546Sopenharmony_ci         }
2550bf215546Sopenharmony_ci         break;
2551bf215546Sopenharmony_ci      default:
2552bf215546Sopenharmony_ci         assert(!"Invalid number of bits per channel in integer format.");
2553bf215546Sopenharmony_ci         break;
2554bf215546Sopenharmony_ci      }
2555bf215546Sopenharmony_ci   } else {
2556bf215546Sopenharmony_ci      BORDER_COLOR_ATTR(ASSIGN, Float, color->f);
2557bf215546Sopenharmony_ci   }
2558bf215546Sopenharmony_ci#elif GFX_VER == 5 || GFX_VER == 6
2559bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color->f);
2560bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color->f);
2561bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color->f);
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci#define MESA_FLOAT_TO_HALF(dst, src)            \
2564bf215546Sopenharmony_ci   dst = _mesa_float_to_half(src);
2565bf215546Sopenharmony_ci
2566bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color->f);
2567bf215546Sopenharmony_ci
2568bf215546Sopenharmony_ci#undef MESA_FLOAT_TO_HALF
2569bf215546Sopenharmony_ci
2570bf215546Sopenharmony_ci   state.BorderColorSnorm8Red   = state.BorderColorSnorm16Red >> 8;
2571bf215546Sopenharmony_ci   state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8;
2572bf215546Sopenharmony_ci   state.BorderColorSnorm8Blue  = state.BorderColorSnorm16Blue >> 8;
2573bf215546Sopenharmony_ci   state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8;
2574bf215546Sopenharmony_ci
2575bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(ASSIGN, Float, color->f);
2576bf215546Sopenharmony_ci
2577bf215546Sopenharmony_ci#elif GFX_VER == 4
2578bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(ASSIGN, , color->f);
2579bf215546Sopenharmony_ci#else
2580bf215546Sopenharmony_ci   BORDER_COLOR_ATTR(ASSIGN, Float, color->f);
2581bf215546Sopenharmony_ci#endif
2582bf215546Sopenharmony_ci
2583bf215546Sopenharmony_ci#undef ASSIGN
2584bf215546Sopenharmony_ci#undef BORDER_COLOR_ATTR
2585bf215546Sopenharmony_ci
2586bf215546Sopenharmony_ci   GENX(SAMPLER_BORDER_COLOR_STATE_pack)(batch, sbc, &state);
2587bf215546Sopenharmony_ci}
2588bf215546Sopenharmony_ci
2589bf215546Sopenharmony_ci/**
2590bf215546Sopenharmony_ci * Upload the sampler states into a contiguous area of GPU memory, for
2591bf215546Sopenharmony_ci * for 3DSTATE_SAMPLER_STATE_POINTERS_*.
2592bf215546Sopenharmony_ci *
2593bf215546Sopenharmony_ci * Also fill out the border color state pointers.
2594bf215546Sopenharmony_ci */
2595bf215546Sopenharmony_cistatic void
2596bf215546Sopenharmony_cicrocus_upload_sampler_states(struct crocus_context *ice,
2597bf215546Sopenharmony_ci                             struct crocus_batch *batch, gl_shader_stage stage)
2598bf215546Sopenharmony_ci{
2599bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
2600bf215546Sopenharmony_ci   const struct shader_info *info = crocus_get_shader_info(ice, stage);
2601bf215546Sopenharmony_ci
2602bf215546Sopenharmony_ci   /* We assume the state tracker will call pipe->bind_sampler_states()
2603bf215546Sopenharmony_ci    * if the program's number of textures changes.
2604bf215546Sopenharmony_ci    */
2605bf215546Sopenharmony_ci   unsigned count = info ? BITSET_LAST_BIT(info->textures_used) : 0;
2606bf215546Sopenharmony_ci
2607bf215546Sopenharmony_ci   if (!count)
2608bf215546Sopenharmony_ci      return;
2609bf215546Sopenharmony_ci
2610bf215546Sopenharmony_ci   /* Assemble the SAMPLER_STATEs into a contiguous table that lives
2611bf215546Sopenharmony_ci    * in the dynamic state memory zone, so we can point to it via the
2612bf215546Sopenharmony_ci    * 3DSTATE_SAMPLER_STATE_POINTERS_* commands.
2613bf215546Sopenharmony_ci    */
2614bf215546Sopenharmony_ci   unsigned size = count * 4 * GENX(SAMPLER_STATE_length);
2615bf215546Sopenharmony_ci   uint32_t *map = stream_state(batch, size, 32, &shs->sampler_offset);
2616bf215546Sopenharmony_ci
2617bf215546Sopenharmony_ci   if (unlikely(!map))
2618bf215546Sopenharmony_ci      return;
2619bf215546Sopenharmony_ci
2620bf215546Sopenharmony_ci   for (int i = 0; i < count; i++) {
2621bf215546Sopenharmony_ci      struct crocus_sampler_state *state = shs->samplers[i];
2622bf215546Sopenharmony_ci      struct crocus_sampler_view *tex = shs->textures[i];
2623bf215546Sopenharmony_ci
2624bf215546Sopenharmony_ci      if (!state || !tex) {
2625bf215546Sopenharmony_ci         memset(map, 0, 4 * GENX(SAMPLER_STATE_length));
2626bf215546Sopenharmony_ci      } else {
2627bf215546Sopenharmony_ci         unsigned border_color_offset = 0;
2628bf215546Sopenharmony_ci         if (state->needs_border_color) {
2629bf215546Sopenharmony_ci            crocus_upload_border_color(batch, state, tex, &border_color_offset);
2630bf215546Sopenharmony_ci         }
2631bf215546Sopenharmony_ci
2632bf215546Sopenharmony_ci         enum samp_workaround wa = SAMP_NORMAL;
2633bf215546Sopenharmony_ci         /* There's a bug in 1D texture sampling - it actually pays
2634bf215546Sopenharmony_ci          * attention to the wrap_t value, though it should not.
2635bf215546Sopenharmony_ci          * Override the wrap_t value here to GL_REPEAT to keep
2636bf215546Sopenharmony_ci          * any nonexistent border pixels from floating in.
2637bf215546Sopenharmony_ci          */
2638bf215546Sopenharmony_ci         if (tex->base.target == PIPE_TEXTURE_1D)
2639bf215546Sopenharmony_ci            wa = SAMP_T_WRAP;
2640bf215546Sopenharmony_ci         else if (tex->base.target == PIPE_TEXTURE_CUBE ||
2641bf215546Sopenharmony_ci                  tex->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
2642bf215546Sopenharmony_ci            /* Cube maps must use the same wrap mode for all three coordinate
2643bf215546Sopenharmony_ci             * dimensions.  Prior to Haswell, only CUBE and CLAMP are valid.
2644bf215546Sopenharmony_ci             *
2645bf215546Sopenharmony_ci             * Ivybridge and Baytrail seem to have problems with CUBE mode and
2646bf215546Sopenharmony_ci             * integer formats.  Fall back to CLAMP for now.
2647bf215546Sopenharmony_ci             */
2648bf215546Sopenharmony_ci            if (state->pstate.seamless_cube_map &&
2649bf215546Sopenharmony_ci                !(GFX_VERx10 == 70 && util_format_is_pure_integer(tex->base.format)))
2650bf215546Sopenharmony_ci               wa = SAMP_CUBE_CUBE;
2651bf215546Sopenharmony_ci            else
2652bf215546Sopenharmony_ci               wa = SAMP_CUBE_CLAMP;
2653bf215546Sopenharmony_ci         }
2654bf215546Sopenharmony_ci
2655bf215546Sopenharmony_ci         uint32_t first_level = 0;
2656bf215546Sopenharmony_ci         if (tex->base.target != PIPE_BUFFER)
2657bf215546Sopenharmony_ci            first_level = tex->base.u.tex.first_level;
2658bf215546Sopenharmony_ci
2659bf215546Sopenharmony_ci         crocus_upload_sampler_state(batch, state, border_color_offset, wa, first_level, map);
2660bf215546Sopenharmony_ci      }
2661bf215546Sopenharmony_ci
2662bf215546Sopenharmony_ci      map += GENX(SAMPLER_STATE_length);
2663bf215546Sopenharmony_ci   }
2664bf215546Sopenharmony_ci}
2665bf215546Sopenharmony_ci
2666bf215546Sopenharmony_ci/**
2667bf215546Sopenharmony_ci * The pipe->create_sampler_view() driver hook.
2668bf215546Sopenharmony_ci */
2669bf215546Sopenharmony_cistatic struct pipe_sampler_view *
2670bf215546Sopenharmony_cicrocus_create_sampler_view(struct pipe_context *ctx,
2671bf215546Sopenharmony_ci                           struct pipe_resource *tex,
2672bf215546Sopenharmony_ci                           const struct pipe_sampler_view *tmpl)
2673bf215546Sopenharmony_ci{
2674bf215546Sopenharmony_ci   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2675bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &screen->devinfo;
2676bf215546Sopenharmony_ci   struct crocus_sampler_view *isv = calloc(1, sizeof(struct crocus_sampler_view));
2677bf215546Sopenharmony_ci
2678bf215546Sopenharmony_ci   if (!isv)
2679bf215546Sopenharmony_ci      return NULL;
2680bf215546Sopenharmony_ci
2681bf215546Sopenharmony_ci   /* initialize base object */
2682bf215546Sopenharmony_ci   isv->base = *tmpl;
2683bf215546Sopenharmony_ci   isv->base.context = ctx;
2684bf215546Sopenharmony_ci   isv->base.texture = NULL;
2685bf215546Sopenharmony_ci   pipe_reference_init(&isv->base.reference, 1);
2686bf215546Sopenharmony_ci   pipe_resource_reference(&isv->base.texture, tex);
2687bf215546Sopenharmony_ci
2688bf215546Sopenharmony_ci   if (util_format_is_depth_or_stencil(tmpl->format)) {
2689bf215546Sopenharmony_ci      struct crocus_resource *zres, *sres;
2690bf215546Sopenharmony_ci      const struct util_format_description *desc =
2691bf215546Sopenharmony_ci         util_format_description(tmpl->format);
2692bf215546Sopenharmony_ci
2693bf215546Sopenharmony_ci      crocus_get_depth_stencil_resources(devinfo, tex, &zres, &sres);
2694bf215546Sopenharmony_ci
2695bf215546Sopenharmony_ci      tex = util_format_has_depth(desc) ? &zres->base.b : &sres->base.b;
2696bf215546Sopenharmony_ci
2697bf215546Sopenharmony_ci      if (tex->format == PIPE_FORMAT_S8_UINT)
2698bf215546Sopenharmony_ci         if (GFX_VER == 7 && sres->shadow)
2699bf215546Sopenharmony_ci            tex = &sres->shadow->base.b;
2700bf215546Sopenharmony_ci   }
2701bf215546Sopenharmony_ci
2702bf215546Sopenharmony_ci   isv->res = (struct crocus_resource *) tex;
2703bf215546Sopenharmony_ci
2704bf215546Sopenharmony_ci   isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT;
2705bf215546Sopenharmony_ci
2706bf215546Sopenharmony_ci   if (isv->base.target == PIPE_TEXTURE_CUBE ||
2707bf215546Sopenharmony_ci       isv->base.target == PIPE_TEXTURE_CUBE_ARRAY)
2708bf215546Sopenharmony_ci      usage |= ISL_SURF_USAGE_CUBE_BIT;
2709bf215546Sopenharmony_ci
2710bf215546Sopenharmony_ci   const struct crocus_format_info fmt =
2711bf215546Sopenharmony_ci      crocus_format_for_usage(devinfo, tmpl->format, usage);
2712bf215546Sopenharmony_ci
2713bf215546Sopenharmony_ci   enum pipe_swizzle vswz[4] = { tmpl->swizzle_r, tmpl->swizzle_g, tmpl->swizzle_b, tmpl->swizzle_a };
2714bf215546Sopenharmony_ci   crocus_combine_swizzle(isv->swizzle, fmt.swizzles, vswz);
2715bf215546Sopenharmony_ci
2716bf215546Sopenharmony_ci   /* hardcode stencil swizzles - hw returns 0G01, we want GGGG */
2717bf215546Sopenharmony_ci   if (GFX_VER < 6 &&
2718bf215546Sopenharmony_ci       (tmpl->format == PIPE_FORMAT_X32_S8X24_UINT ||
2719bf215546Sopenharmony_ci        tmpl->format == PIPE_FORMAT_X24S8_UINT)) {
2720bf215546Sopenharmony_ci      isv->swizzle[0] = tmpl->swizzle_g;
2721bf215546Sopenharmony_ci      isv->swizzle[1] = tmpl->swizzle_g;
2722bf215546Sopenharmony_ci      isv->swizzle[2] = tmpl->swizzle_g;
2723bf215546Sopenharmony_ci      isv->swizzle[3] = tmpl->swizzle_g;
2724bf215546Sopenharmony_ci   }
2725bf215546Sopenharmony_ci
2726bf215546Sopenharmony_ci   isv->clear_color = isv->res->aux.clear_color;
2727bf215546Sopenharmony_ci
2728bf215546Sopenharmony_ci   isv->view = (struct isl_view) {
2729bf215546Sopenharmony_ci      .format = fmt.fmt,
2730bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
2731bf215546Sopenharmony_ci      .swizzle = (struct isl_swizzle) {
2732bf215546Sopenharmony_ci         .r = pipe_to_isl_swizzle(isv->swizzle[0], false),
2733bf215546Sopenharmony_ci         .g = pipe_to_isl_swizzle(isv->swizzle[1], false),
2734bf215546Sopenharmony_ci         .b = pipe_to_isl_swizzle(isv->swizzle[2], false),
2735bf215546Sopenharmony_ci         .a = pipe_to_isl_swizzle(isv->swizzle[3], false),
2736bf215546Sopenharmony_ci      },
2737bf215546Sopenharmony_ci#else
2738bf215546Sopenharmony_ci      /* swizzling handled in shader code */
2739bf215546Sopenharmony_ci      .swizzle = ISL_SWIZZLE_IDENTITY,
2740bf215546Sopenharmony_ci#endif
2741bf215546Sopenharmony_ci      .usage = usage,
2742bf215546Sopenharmony_ci   };
2743bf215546Sopenharmony_ci
2744bf215546Sopenharmony_ci   /* Fill out SURFACE_STATE for this view. */
2745bf215546Sopenharmony_ci   if (tmpl->target != PIPE_BUFFER) {
2746bf215546Sopenharmony_ci      isv->view.base_level = tmpl->u.tex.first_level;
2747bf215546Sopenharmony_ci      isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1;
2748bf215546Sopenharmony_ci
2749bf215546Sopenharmony_ci      /* Hardware older than skylake ignores this value */
2750bf215546Sopenharmony_ci      assert(tex->target != PIPE_TEXTURE_3D || !tmpl->u.tex.first_layer);
2751bf215546Sopenharmony_ci
2752bf215546Sopenharmony_ci      // XXX: do I need to port f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2?
2753bf215546Sopenharmony_ci      isv->view.base_array_layer = tmpl->u.tex.first_layer;
2754bf215546Sopenharmony_ci      isv->view.array_len =
2755bf215546Sopenharmony_ci         tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;
2756bf215546Sopenharmony_ci   }
2757bf215546Sopenharmony_ci#if GFX_VER >= 6
2758bf215546Sopenharmony_ci   /* just create a second view struct for texture gather just in case */
2759bf215546Sopenharmony_ci   isv->gather_view = isv->view;
2760bf215546Sopenharmony_ci
2761bf215546Sopenharmony_ci#if GFX_VER == 7
2762bf215546Sopenharmony_ci   if (fmt.fmt == ISL_FORMAT_R32G32_FLOAT ||
2763bf215546Sopenharmony_ci       fmt.fmt == ISL_FORMAT_R32G32_SINT ||
2764bf215546Sopenharmony_ci       fmt.fmt == ISL_FORMAT_R32G32_UINT) {
2765bf215546Sopenharmony_ci      isv->gather_view.format = ISL_FORMAT_R32G32_FLOAT_LD;
2766bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
2767bf215546Sopenharmony_ci      isv->gather_view.swizzle = (struct isl_swizzle) {
2768bf215546Sopenharmony_ci         .r = pipe_to_isl_swizzle(isv->swizzle[0], GFX_VERx10 == 75),
2769bf215546Sopenharmony_ci         .g = pipe_to_isl_swizzle(isv->swizzle[1], GFX_VERx10 == 75),
2770bf215546Sopenharmony_ci         .b = pipe_to_isl_swizzle(isv->swizzle[2], GFX_VERx10 == 75),
2771bf215546Sopenharmony_ci         .a = pipe_to_isl_swizzle(isv->swizzle[3], GFX_VERx10 == 75),
2772bf215546Sopenharmony_ci      };
2773bf215546Sopenharmony_ci#endif
2774bf215546Sopenharmony_ci   }
2775bf215546Sopenharmony_ci#endif
2776bf215546Sopenharmony_ci#if GFX_VER == 6
2777bf215546Sopenharmony_ci   /* Sandybridge's gather4 message is broken for integer formats.
2778bf215546Sopenharmony_ci    * To work around this, we pretend the surface is UNORM for
2779bf215546Sopenharmony_ci    * 8 or 16-bit formats, and emit shader instructions to recover
2780bf215546Sopenharmony_ci    * the real INT/UINT value.  For 32-bit formats, we pretend
2781bf215546Sopenharmony_ci    * the surface is FLOAT, and simply reinterpret the resulting
2782bf215546Sopenharmony_ci    * bits.
2783bf215546Sopenharmony_ci    */
2784bf215546Sopenharmony_ci   switch (fmt.fmt) {
2785bf215546Sopenharmony_ci   case ISL_FORMAT_R8_SINT:
2786bf215546Sopenharmony_ci   case ISL_FORMAT_R8_UINT:
2787bf215546Sopenharmony_ci      isv->gather_view.format = ISL_FORMAT_R8_UNORM;
2788bf215546Sopenharmony_ci      break;
2789bf215546Sopenharmony_ci
2790bf215546Sopenharmony_ci   case ISL_FORMAT_R16_SINT:
2791bf215546Sopenharmony_ci   case ISL_FORMAT_R16_UINT:
2792bf215546Sopenharmony_ci      isv->gather_view.format = ISL_FORMAT_R16_UNORM;
2793bf215546Sopenharmony_ci      break;
2794bf215546Sopenharmony_ci
2795bf215546Sopenharmony_ci   case ISL_FORMAT_R32_SINT:
2796bf215546Sopenharmony_ci   case ISL_FORMAT_R32_UINT:
2797bf215546Sopenharmony_ci      isv->gather_view.format = ISL_FORMAT_R32_FLOAT;
2798bf215546Sopenharmony_ci      break;
2799bf215546Sopenharmony_ci
2800bf215546Sopenharmony_ci   default:
2801bf215546Sopenharmony_ci      break;
2802bf215546Sopenharmony_ci   }
2803bf215546Sopenharmony_ci#endif
2804bf215546Sopenharmony_ci#endif
2805bf215546Sopenharmony_ci   /* Fill out SURFACE_STATE for this view. */
2806bf215546Sopenharmony_ci   if (tmpl->target != PIPE_BUFFER) {
2807bf215546Sopenharmony_ci      if (crocus_resource_unfinished_aux_import(isv->res))
2808bf215546Sopenharmony_ci         crocus_resource_finish_aux_import(&screen->base, isv->res);
2809bf215546Sopenharmony_ci
2810bf215546Sopenharmony_ci   }
2811bf215546Sopenharmony_ci
2812bf215546Sopenharmony_ci   return &isv->base;
2813bf215546Sopenharmony_ci}
2814bf215546Sopenharmony_ci
2815bf215546Sopenharmony_cistatic void
2816bf215546Sopenharmony_cicrocus_sampler_view_destroy(struct pipe_context *ctx,
2817bf215546Sopenharmony_ci                            struct pipe_sampler_view *state)
2818bf215546Sopenharmony_ci{
2819bf215546Sopenharmony_ci   struct crocus_sampler_view *isv = (void *) state;
2820bf215546Sopenharmony_ci   pipe_resource_reference(&state->texture, NULL);
2821bf215546Sopenharmony_ci   free(isv);
2822bf215546Sopenharmony_ci}
2823bf215546Sopenharmony_ci
2824bf215546Sopenharmony_ci/**
2825bf215546Sopenharmony_ci * The pipe->create_surface() driver hook.
2826bf215546Sopenharmony_ci *
2827bf215546Sopenharmony_ci * In Gallium nomenclature, "surfaces" are a view of a resource that
2828bf215546Sopenharmony_ci * can be bound as a render target or depth/stencil buffer.
2829bf215546Sopenharmony_ci */
2830bf215546Sopenharmony_cistatic struct pipe_surface *
2831bf215546Sopenharmony_cicrocus_create_surface(struct pipe_context *ctx,
2832bf215546Sopenharmony_ci                      struct pipe_resource *tex,
2833bf215546Sopenharmony_ci                      const struct pipe_surface *tmpl)
2834bf215546Sopenharmony_ci{
2835bf215546Sopenharmony_ci   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2836bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &screen->devinfo;
2837bf215546Sopenharmony_ci
2838bf215546Sopenharmony_ci   isl_surf_usage_flags_t usage = 0;
2839bf215546Sopenharmony_ci   if (tmpl->writable)
2840bf215546Sopenharmony_ci      usage = ISL_SURF_USAGE_STORAGE_BIT;
2841bf215546Sopenharmony_ci   else if (util_format_is_depth_or_stencil(tmpl->format))
2842bf215546Sopenharmony_ci      usage = ISL_SURF_USAGE_DEPTH_BIT;
2843bf215546Sopenharmony_ci   else
2844bf215546Sopenharmony_ci      usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
2845bf215546Sopenharmony_ci
2846bf215546Sopenharmony_ci   const struct crocus_format_info fmt =
2847bf215546Sopenharmony_ci      crocus_format_for_usage(devinfo, tmpl->format, usage);
2848bf215546Sopenharmony_ci
2849bf215546Sopenharmony_ci   if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
2850bf215546Sopenharmony_ci       !isl_format_supports_rendering(devinfo, fmt.fmt)) {
2851bf215546Sopenharmony_ci      /* Framebuffer validation will reject this invalid case, but it
2852bf215546Sopenharmony_ci       * hasn't had the opportunity yet.  In the meantime, we need to
2853bf215546Sopenharmony_ci       * avoid hitting ISL asserts about unsupported formats below.
2854bf215546Sopenharmony_ci       */
2855bf215546Sopenharmony_ci      return NULL;
2856bf215546Sopenharmony_ci   }
2857bf215546Sopenharmony_ci
2858bf215546Sopenharmony_ci   struct crocus_surface *surf = calloc(1, sizeof(struct crocus_surface));
2859bf215546Sopenharmony_ci   struct pipe_surface *psurf = &surf->base;
2860bf215546Sopenharmony_ci   struct crocus_resource *res = (struct crocus_resource *) tex;
2861bf215546Sopenharmony_ci
2862bf215546Sopenharmony_ci   if (!surf)
2863bf215546Sopenharmony_ci      return NULL;
2864bf215546Sopenharmony_ci
2865bf215546Sopenharmony_ci   pipe_reference_init(&psurf->reference, 1);
2866bf215546Sopenharmony_ci   pipe_resource_reference(&psurf->texture, tex);
2867bf215546Sopenharmony_ci   psurf->context = ctx;
2868bf215546Sopenharmony_ci   psurf->format = tmpl->format;
2869bf215546Sopenharmony_ci   psurf->width = tex->width0;
2870bf215546Sopenharmony_ci   psurf->height = tex->height0;
2871bf215546Sopenharmony_ci   psurf->texture = tex;
2872bf215546Sopenharmony_ci   psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
2873bf215546Sopenharmony_ci   psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
2874bf215546Sopenharmony_ci   psurf->u.tex.level = tmpl->u.tex.level;
2875bf215546Sopenharmony_ci
2876bf215546Sopenharmony_ci   uint32_t array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;
2877bf215546Sopenharmony_ci
2878bf215546Sopenharmony_ci   struct isl_view *view = &surf->view;
2879bf215546Sopenharmony_ci   *view = (struct isl_view) {
2880bf215546Sopenharmony_ci      .format = fmt.fmt,
2881bf215546Sopenharmony_ci      .base_level = tmpl->u.tex.level,
2882bf215546Sopenharmony_ci      .levels = 1,
2883bf215546Sopenharmony_ci      .base_array_layer = tmpl->u.tex.first_layer,
2884bf215546Sopenharmony_ci      .array_len = array_len,
2885bf215546Sopenharmony_ci      .swizzle = ISL_SWIZZLE_IDENTITY,
2886bf215546Sopenharmony_ci      .usage = usage,
2887bf215546Sopenharmony_ci   };
2888bf215546Sopenharmony_ci
2889bf215546Sopenharmony_ci#if GFX_VER >= 6
2890bf215546Sopenharmony_ci   struct isl_view *read_view = &surf->read_view;
2891bf215546Sopenharmony_ci   *read_view = (struct isl_view) {
2892bf215546Sopenharmony_ci      .format = fmt.fmt,
2893bf215546Sopenharmony_ci      .base_level = tmpl->u.tex.level,
2894bf215546Sopenharmony_ci      .levels = 1,
2895bf215546Sopenharmony_ci      .base_array_layer = tmpl->u.tex.first_layer,
2896bf215546Sopenharmony_ci      .array_len = array_len,
2897bf215546Sopenharmony_ci      .swizzle = ISL_SWIZZLE_IDENTITY,
2898bf215546Sopenharmony_ci      .usage = ISL_SURF_USAGE_TEXTURE_BIT,
2899bf215546Sopenharmony_ci   };
2900bf215546Sopenharmony_ci#endif
2901bf215546Sopenharmony_ci
2902bf215546Sopenharmony_ci   surf->clear_color = res->aux.clear_color;
2903bf215546Sopenharmony_ci
2904bf215546Sopenharmony_ci   /* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */
2905bf215546Sopenharmony_ci   if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT |
2906bf215546Sopenharmony_ci                          ISL_SURF_USAGE_STENCIL_BIT))
2907bf215546Sopenharmony_ci      return psurf;
2908bf215546Sopenharmony_ci
2909bf215546Sopenharmony_ci   if (!isl_format_is_compressed(res->surf.format)) {
2910bf215546Sopenharmony_ci      if (crocus_resource_unfinished_aux_import(res))
2911bf215546Sopenharmony_ci         crocus_resource_finish_aux_import(&screen->base, res);
2912bf215546Sopenharmony_ci
2913bf215546Sopenharmony_ci      memcpy(&surf->surf, &res->surf, sizeof(surf->surf));
2914bf215546Sopenharmony_ci      uint64_t temp_offset;
2915bf215546Sopenharmony_ci      uint32_t temp_x, temp_y;
2916bf215546Sopenharmony_ci
2917bf215546Sopenharmony_ci      isl_surf_get_image_offset_B_tile_sa(&res->surf, tmpl->u.tex.level,
2918bf215546Sopenharmony_ci                                          res->base.b.target == PIPE_TEXTURE_3D ? 0 : tmpl->u.tex.first_layer,
2919bf215546Sopenharmony_ci                                          res->base.b.target == PIPE_TEXTURE_3D ? tmpl->u.tex.first_layer : 0,
2920bf215546Sopenharmony_ci                                          &temp_offset, &temp_x, &temp_y);
2921bf215546Sopenharmony_ci      if (!devinfo->has_surface_tile_offset &&
2922bf215546Sopenharmony_ci          (temp_x || temp_y)) {
2923bf215546Sopenharmony_ci         /* Original gfx4 hardware couldn't draw to a non-tile-aligned
2924bf215546Sopenharmony_ci          * destination.
2925bf215546Sopenharmony_ci          */
2926bf215546Sopenharmony_ci         /* move to temp */
2927bf215546Sopenharmony_ci         struct pipe_resource wa_templ = (struct pipe_resource) {
2928bf215546Sopenharmony_ci            .width0 = u_minify(res->base.b.width0, tmpl->u.tex.level),
2929bf215546Sopenharmony_ci            .height0 = u_minify(res->base.b.height0, tmpl->u.tex.level),
2930bf215546Sopenharmony_ci            .depth0 = 1,
2931bf215546Sopenharmony_ci            .array_size = 1,
2932bf215546Sopenharmony_ci            .format = res->base.b.format,
2933bf215546Sopenharmony_ci            .target = PIPE_TEXTURE_2D,
2934bf215546Sopenharmony_ci            .bind = (usage & ISL_SURF_USAGE_DEPTH_BIT ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET) | PIPE_BIND_SAMPLER_VIEW,
2935bf215546Sopenharmony_ci         };
2936bf215546Sopenharmony_ci         surf->align_res = screen->base.resource_create(&screen->base, &wa_templ);
2937bf215546Sopenharmony_ci         view->base_level = 0;
2938bf215546Sopenharmony_ci         view->base_array_layer = 0;
2939bf215546Sopenharmony_ci         view->array_len = 1;
2940bf215546Sopenharmony_ci         struct crocus_resource *align_res = (struct crocus_resource *)surf->align_res;
2941bf215546Sopenharmony_ci         memcpy(&surf->surf, &align_res->surf, sizeof(surf->surf));
2942bf215546Sopenharmony_ci      }
2943bf215546Sopenharmony_ci      return psurf;
2944bf215546Sopenharmony_ci   }
2945bf215546Sopenharmony_ci
2946bf215546Sopenharmony_ci   /* The resource has a compressed format, which is not renderable, but we
2947bf215546Sopenharmony_ci    * have a renderable view format.  We must be attempting to upload blocks
2948bf215546Sopenharmony_ci    * of compressed data via an uncompressed view.
2949bf215546Sopenharmony_ci    *
2950bf215546Sopenharmony_ci    * In this case, we can assume there are no auxiliary buffers, a single
2951bf215546Sopenharmony_ci    * miplevel, and that the resource is single-sampled.  Gallium may try
2952bf215546Sopenharmony_ci    * and create an uncompressed view with multiple layers, however.
2953bf215546Sopenharmony_ci    */
2954bf215546Sopenharmony_ci   assert(!isl_format_is_compressed(fmt.fmt));
2955bf215546Sopenharmony_ci   assert(res->surf.samples == 1);
2956bf215546Sopenharmony_ci   assert(view->levels == 1);
2957bf215546Sopenharmony_ci
2958bf215546Sopenharmony_ci   /* TODO: compressed pbo uploads aren't working here */
2959bf215546Sopenharmony_ci   return NULL;
2960bf215546Sopenharmony_ci
2961bf215546Sopenharmony_ci   uint64_t offset_B = 0;
2962bf215546Sopenharmony_ci   uint32_t tile_x_sa = 0, tile_y_sa = 0;
2963bf215546Sopenharmony_ci
2964bf215546Sopenharmony_ci   if (view->base_level > 0) {
2965bf215546Sopenharmony_ci      /* We can't rely on the hardware's miplevel selection with such
2966bf215546Sopenharmony_ci       * a substantial lie about the format, so we select a single image
2967bf215546Sopenharmony_ci       * using the Tile X/Y Offset fields.  In this case, we can't handle
2968bf215546Sopenharmony_ci       * multiple array slices.
2969bf215546Sopenharmony_ci       *
2970bf215546Sopenharmony_ci       * On Broadwell, HALIGN and VALIGN are specified in pixels and are
2971bf215546Sopenharmony_ci       * hard-coded to align to exactly the block size of the compressed
2972bf215546Sopenharmony_ci       * texture.  This means that, when reinterpreted as a non-compressed
2973bf215546Sopenharmony_ci       * texture, the tile offsets may be anything and we can't rely on
2974bf215546Sopenharmony_ci       * X/Y Offset.
2975bf215546Sopenharmony_ci       *
2976bf215546Sopenharmony_ci       * Return NULL to force the state tracker to take fallback paths.
2977bf215546Sopenharmony_ci       */
2978bf215546Sopenharmony_ci      // TODO: check if the gen7 check is right, originally gen8
2979bf215546Sopenharmony_ci      if (view->array_len > 1 || GFX_VER == 7)
2980bf215546Sopenharmony_ci         return NULL;
2981bf215546Sopenharmony_ci
2982bf215546Sopenharmony_ci      const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D;
2983bf215546Sopenharmony_ci      isl_surf_get_image_surf(&screen->isl_dev, &res->surf,
2984bf215546Sopenharmony_ci                              view->base_level,
2985bf215546Sopenharmony_ci                              is_3d ? 0 : view->base_array_layer,
2986bf215546Sopenharmony_ci                              is_3d ? view->base_array_layer : 0,
2987bf215546Sopenharmony_ci                              &surf->surf,
2988bf215546Sopenharmony_ci                              &offset_B, &tile_x_sa, &tile_y_sa);
2989bf215546Sopenharmony_ci
2990bf215546Sopenharmony_ci      /* We use address and tile offsets to access a single level/layer
2991bf215546Sopenharmony_ci       * as a subimage, so reset level/layer so it doesn't offset again.
2992bf215546Sopenharmony_ci       */
2993bf215546Sopenharmony_ci      view->base_array_layer = 0;
2994bf215546Sopenharmony_ci      view->base_level = 0;
2995bf215546Sopenharmony_ci   } else {
2996bf215546Sopenharmony_ci      /* Level 0 doesn't require tile offsets, and the hardware can find
2997bf215546Sopenharmony_ci       * array slices using QPitch even with the format override, so we
2998bf215546Sopenharmony_ci       * can allow layers in this case.  Copy the original ISL surface.
2999bf215546Sopenharmony_ci       */
3000bf215546Sopenharmony_ci      memcpy(&surf->surf, &res->surf, sizeof(surf->surf));
3001bf215546Sopenharmony_ci   }
3002bf215546Sopenharmony_ci
3003bf215546Sopenharmony_ci   /* Scale down the image dimensions by the block size. */
3004bf215546Sopenharmony_ci   const struct isl_format_layout *fmtl =
3005bf215546Sopenharmony_ci      isl_format_get_layout(res->surf.format);
3006bf215546Sopenharmony_ci   surf->surf.format = fmt.fmt;
3007bf215546Sopenharmony_ci   surf->surf.logical_level0_px = isl_surf_get_logical_level0_el(&surf->surf);
3008bf215546Sopenharmony_ci   surf->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&surf->surf);
3009bf215546Sopenharmony_ci   tile_x_sa /= fmtl->bw;
3010bf215546Sopenharmony_ci   tile_y_sa /= fmtl->bh;
3011bf215546Sopenharmony_ci
3012bf215546Sopenharmony_ci   psurf->width = surf->surf.logical_level0_px.width;
3013bf215546Sopenharmony_ci   psurf->height = surf->surf.logical_level0_px.height;
3014bf215546Sopenharmony_ci
3015bf215546Sopenharmony_ci   return psurf;
3016bf215546Sopenharmony_ci}
3017bf215546Sopenharmony_ci
3018bf215546Sopenharmony_ci#if GFX_VER >= 7
3019bf215546Sopenharmony_cistatic void
3020bf215546Sopenharmony_cifill_default_image_param(struct brw_image_param *param)
3021bf215546Sopenharmony_ci{
3022bf215546Sopenharmony_ci   memset(param, 0, sizeof(*param));
3023bf215546Sopenharmony_ci   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
3024bf215546Sopenharmony_ci    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
3025bf215546Sopenharmony_ci    * detailed explanation of these parameters.
3026bf215546Sopenharmony_ci    */
3027bf215546Sopenharmony_ci   param->swizzling[0] = 0xff;
3028bf215546Sopenharmony_ci   param->swizzling[1] = 0xff;
3029bf215546Sopenharmony_ci}
3030bf215546Sopenharmony_ci
3031bf215546Sopenharmony_cistatic void
3032bf215546Sopenharmony_cifill_buffer_image_param(struct brw_image_param *param,
3033bf215546Sopenharmony_ci                        enum pipe_format pfmt,
3034bf215546Sopenharmony_ci                        unsigned size)
3035bf215546Sopenharmony_ci{
3036bf215546Sopenharmony_ci   const unsigned cpp = util_format_get_blocksize(pfmt);
3037bf215546Sopenharmony_ci
3038bf215546Sopenharmony_ci   fill_default_image_param(param);
3039bf215546Sopenharmony_ci   param->size[0] = size / cpp;
3040bf215546Sopenharmony_ci   param->stride[0] = cpp;
3041bf215546Sopenharmony_ci}
3042bf215546Sopenharmony_ci
3043bf215546Sopenharmony_ci#endif
3044bf215546Sopenharmony_ci
3045bf215546Sopenharmony_ci/**
3046bf215546Sopenharmony_ci * The pipe->set_shader_images() driver hook.
3047bf215546Sopenharmony_ci */
3048bf215546Sopenharmony_cistatic void
3049bf215546Sopenharmony_cicrocus_set_shader_images(struct pipe_context *ctx,
3050bf215546Sopenharmony_ci                         enum pipe_shader_type p_stage,
3051bf215546Sopenharmony_ci                         unsigned start_slot, unsigned count,
3052bf215546Sopenharmony_ci                         unsigned unbind_num_trailing_slots,
3053bf215546Sopenharmony_ci                         const struct pipe_image_view *p_images)
3054bf215546Sopenharmony_ci{
3055bf215546Sopenharmony_ci#if GFX_VER >= 7
3056bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3057bf215546Sopenharmony_ci   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
3058bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &screen->devinfo;
3059bf215546Sopenharmony_ci   gl_shader_stage stage = stage_from_pipe(p_stage);
3060bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
3061bf215546Sopenharmony_ci   struct crocus_genx_state *genx = ice->state.genx;
3062bf215546Sopenharmony_ci   struct brw_image_param *image_params = genx->shaders[stage].image_param;
3063bf215546Sopenharmony_ci
3064bf215546Sopenharmony_ci   shs->bound_image_views &= ~u_bit_consecutive(start_slot, count);
3065bf215546Sopenharmony_ci
3066bf215546Sopenharmony_ci   for (unsigned i = 0; i < count; i++) {
3067bf215546Sopenharmony_ci      struct crocus_image_view *iv = &shs->image[start_slot + i];
3068bf215546Sopenharmony_ci
3069bf215546Sopenharmony_ci      if (p_images && p_images[i].resource) {
3070bf215546Sopenharmony_ci         const struct pipe_image_view *img = &p_images[i];
3071bf215546Sopenharmony_ci         struct crocus_resource *res = (void *) img->resource;
3072bf215546Sopenharmony_ci
3073bf215546Sopenharmony_ci         util_copy_image_view(&iv->base, img);
3074bf215546Sopenharmony_ci
3075bf215546Sopenharmony_ci         shs->bound_image_views |= 1 << (start_slot + i);
3076bf215546Sopenharmony_ci
3077bf215546Sopenharmony_ci         res->bind_history |= PIPE_BIND_SHADER_IMAGE;
3078bf215546Sopenharmony_ci         res->bind_stages |= 1 << stage;
3079bf215546Sopenharmony_ci
3080bf215546Sopenharmony_ci         isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT;
3081bf215546Sopenharmony_ci         struct crocus_format_info fmt =
3082bf215546Sopenharmony_ci            crocus_format_for_usage(devinfo, img->format, usage);
3083bf215546Sopenharmony_ci
3084bf215546Sopenharmony_ci         struct isl_swizzle swiz = pipe_to_isl_swizzles(fmt.swizzles);
3085bf215546Sopenharmony_ci         if (img->shader_access & PIPE_IMAGE_ACCESS_READ) {
3086bf215546Sopenharmony_ci            /* On Gen8, try to use typed surfaces reads (which support a
3087bf215546Sopenharmony_ci             * limited number of formats), and if not possible, fall back
3088bf215546Sopenharmony_ci             * to untyped reads.
3089bf215546Sopenharmony_ci             */
3090bf215546Sopenharmony_ci            if (!isl_has_matching_typed_storage_image_format(devinfo, fmt.fmt))
3091bf215546Sopenharmony_ci               fmt.fmt = ISL_FORMAT_RAW;
3092bf215546Sopenharmony_ci            else
3093bf215546Sopenharmony_ci               fmt.fmt = isl_lower_storage_image_format(devinfo, fmt.fmt);
3094bf215546Sopenharmony_ci         }
3095bf215546Sopenharmony_ci
3096bf215546Sopenharmony_ci         if (res->base.b.target != PIPE_BUFFER) {
3097bf215546Sopenharmony_ci            struct isl_view view = {
3098bf215546Sopenharmony_ci               .format = fmt.fmt,
3099bf215546Sopenharmony_ci               .base_level = img->u.tex.level,
3100bf215546Sopenharmony_ci               .levels = 1,
3101bf215546Sopenharmony_ci               .base_array_layer = img->u.tex.first_layer,
3102bf215546Sopenharmony_ci               .array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1,
3103bf215546Sopenharmony_ci               .swizzle = swiz,
3104bf215546Sopenharmony_ci               .usage = usage,
3105bf215546Sopenharmony_ci            };
3106bf215546Sopenharmony_ci
3107bf215546Sopenharmony_ci            iv->view = view;
3108bf215546Sopenharmony_ci
3109bf215546Sopenharmony_ci            isl_surf_fill_image_param(&screen->isl_dev,
3110bf215546Sopenharmony_ci                                      &image_params[start_slot + i],
3111bf215546Sopenharmony_ci                                      &res->surf, &view);
3112bf215546Sopenharmony_ci         } else {
3113bf215546Sopenharmony_ci            struct isl_view view = {
3114bf215546Sopenharmony_ci               .format = fmt.fmt,
3115bf215546Sopenharmony_ci               .swizzle = swiz,
3116bf215546Sopenharmony_ci               .usage = usage,
3117bf215546Sopenharmony_ci            };
3118bf215546Sopenharmony_ci            iv->view = view;
3119bf215546Sopenharmony_ci
3120bf215546Sopenharmony_ci            util_range_add(&res->base.b, &res->valid_buffer_range, img->u.buf.offset,
3121bf215546Sopenharmony_ci                           img->u.buf.offset + img->u.buf.size);
3122bf215546Sopenharmony_ci            fill_buffer_image_param(&image_params[start_slot + i],
3123bf215546Sopenharmony_ci                                    img->format, img->u.buf.size);
3124bf215546Sopenharmony_ci         }
3125bf215546Sopenharmony_ci      } else {
3126bf215546Sopenharmony_ci         pipe_resource_reference(&iv->base.resource, NULL);
3127bf215546Sopenharmony_ci         fill_default_image_param(&image_params[start_slot + i]);
3128bf215546Sopenharmony_ci      }
3129bf215546Sopenharmony_ci   }
3130bf215546Sopenharmony_ci
3131bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;
3132bf215546Sopenharmony_ci   ice->state.dirty |=
3133bf215546Sopenharmony_ci      stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES
3134bf215546Sopenharmony_ci                                   : CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3135bf215546Sopenharmony_ci
3136bf215546Sopenharmony_ci   /* Broadwell also needs brw_image_params re-uploaded */
3137bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage;
3138bf215546Sopenharmony_ci   shs->sysvals_need_upload = true;
3139bf215546Sopenharmony_ci#endif
3140bf215546Sopenharmony_ci}
3141bf215546Sopenharmony_ci
3142bf215546Sopenharmony_ci
3143bf215546Sopenharmony_ci/**
3144bf215546Sopenharmony_ci * The pipe->set_sampler_views() driver hook.
3145bf215546Sopenharmony_ci */
3146bf215546Sopenharmony_cistatic void
3147bf215546Sopenharmony_cicrocus_set_sampler_views(struct pipe_context *ctx,
3148bf215546Sopenharmony_ci                         enum pipe_shader_type p_stage,
3149bf215546Sopenharmony_ci                         unsigned start, unsigned count,
3150bf215546Sopenharmony_ci                         unsigned unbind_num_trailing_slots,
3151bf215546Sopenharmony_ci                         bool take_ownership,
3152bf215546Sopenharmony_ci                         struct pipe_sampler_view **views)
3153bf215546Sopenharmony_ci{
3154bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3155bf215546Sopenharmony_ci   gl_shader_stage stage = stage_from_pipe(p_stage);
3156bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
3157bf215546Sopenharmony_ci
3158bf215546Sopenharmony_ci   shs->bound_sampler_views &= ~u_bit_consecutive(start, count);
3159bf215546Sopenharmony_ci
3160bf215546Sopenharmony_ci   for (unsigned i = 0; i < count; i++) {
3161bf215546Sopenharmony_ci      struct pipe_sampler_view *pview = views ? views[i] : NULL;
3162bf215546Sopenharmony_ci
3163bf215546Sopenharmony_ci      if (take_ownership) {
3164bf215546Sopenharmony_ci         pipe_sampler_view_reference((struct pipe_sampler_view **)
3165bf215546Sopenharmony_ci                                     &shs->textures[start + i], NULL);
3166bf215546Sopenharmony_ci         shs->textures[start + i] = (struct crocus_sampler_view *)pview;
3167bf215546Sopenharmony_ci      } else {
3168bf215546Sopenharmony_ci         pipe_sampler_view_reference((struct pipe_sampler_view **)
3169bf215546Sopenharmony_ci                                     &shs->textures[start + i], pview);
3170bf215546Sopenharmony_ci      }
3171bf215546Sopenharmony_ci
3172bf215546Sopenharmony_ci      struct crocus_sampler_view *view = (void *) pview;
3173bf215546Sopenharmony_ci      if (view) {
3174bf215546Sopenharmony_ci         view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW;
3175bf215546Sopenharmony_ci         view->res->bind_stages |= 1 << stage;
3176bf215546Sopenharmony_ci
3177bf215546Sopenharmony_ci         shs->bound_sampler_views |= 1 << (start + i);
3178bf215546Sopenharmony_ci      }
3179bf215546Sopenharmony_ci   }
3180bf215546Sopenharmony_ci#if GFX_VER == 6
3181bf215546Sopenharmony_ci   /* first level parameters to crocus_upload_sampler_state is gfx6 only */
3182bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3183bf215546Sopenharmony_ci#endif
3184bf215546Sopenharmony_ci   ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage);
3185bf215546Sopenharmony_ci   ice->state.dirty |=
3186bf215546Sopenharmony_ci      stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES
3187bf215546Sopenharmony_ci                                   : CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3188bf215546Sopenharmony_ci   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES];
3189bf215546Sopenharmony_ci}
3190bf215546Sopenharmony_ci
3191bf215546Sopenharmony_ci/**
3192bf215546Sopenharmony_ci * The pipe->set_tess_state() driver hook.
3193bf215546Sopenharmony_ci */
3194bf215546Sopenharmony_cistatic void
3195bf215546Sopenharmony_cicrocus_set_tess_state(struct pipe_context *ctx,
3196bf215546Sopenharmony_ci                      const float default_outer_level[4],
3197bf215546Sopenharmony_ci                      const float default_inner_level[2])
3198bf215546Sopenharmony_ci{
3199bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3200bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
3201bf215546Sopenharmony_ci
3202bf215546Sopenharmony_ci   memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float));
3203bf215546Sopenharmony_ci   memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float));
3204bf215546Sopenharmony_ci
3205bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
3206bf215546Sopenharmony_ci   shs->sysvals_need_upload = true;
3207bf215546Sopenharmony_ci}
3208bf215546Sopenharmony_ci
3209bf215546Sopenharmony_cistatic void
3210bf215546Sopenharmony_cicrocus_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices)
3211bf215546Sopenharmony_ci{
3212bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3213bf215546Sopenharmony_ci
3214bf215546Sopenharmony_ci   ice->state.patch_vertices = patch_vertices;
3215bf215546Sopenharmony_ci}
3216bf215546Sopenharmony_ci
3217bf215546Sopenharmony_cistatic void
3218bf215546Sopenharmony_cicrocus_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
3219bf215546Sopenharmony_ci{
3220bf215546Sopenharmony_ci   struct crocus_surface *surf = (void *) p_surf;
3221bf215546Sopenharmony_ci   pipe_resource_reference(&p_surf->texture, NULL);
3222bf215546Sopenharmony_ci
3223bf215546Sopenharmony_ci   pipe_resource_reference(&surf->align_res, NULL);
3224bf215546Sopenharmony_ci   free(surf);
3225bf215546Sopenharmony_ci}
3226bf215546Sopenharmony_ci
3227bf215546Sopenharmony_cistatic void
3228bf215546Sopenharmony_cicrocus_set_clip_state(struct pipe_context *ctx,
3229bf215546Sopenharmony_ci                      const struct pipe_clip_state *state)
3230bf215546Sopenharmony_ci{
3231bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3232bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
3233bf215546Sopenharmony_ci   struct crocus_shader_state *gshs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
3234bf215546Sopenharmony_ci   struct crocus_shader_state *tshs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
3235bf215546Sopenharmony_ci
3236bf215546Sopenharmony_ci   memcpy(&ice->state.clip_planes, state, sizeof(*state));
3237bf215546Sopenharmony_ci
3238bf215546Sopenharmony_ci#if GFX_VER <= 5
3239bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;
3240bf215546Sopenharmony_ci#endif
3241bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS | CROCUS_STAGE_DIRTY_CONSTANTS_GS |
3242bf215546Sopenharmony_ci                             CROCUS_STAGE_DIRTY_CONSTANTS_TES;
3243bf215546Sopenharmony_ci   shs->sysvals_need_upload = true;
3244bf215546Sopenharmony_ci   gshs->sysvals_need_upload = true;
3245bf215546Sopenharmony_ci   tshs->sysvals_need_upload = true;
3246bf215546Sopenharmony_ci}
3247bf215546Sopenharmony_ci
3248bf215546Sopenharmony_ci/**
3249bf215546Sopenharmony_ci * The pipe->set_polygon_stipple() driver hook.
3250bf215546Sopenharmony_ci */
3251bf215546Sopenharmony_cistatic void
3252bf215546Sopenharmony_cicrocus_set_polygon_stipple(struct pipe_context *ctx,
3253bf215546Sopenharmony_ci                           const struct pipe_poly_stipple *state)
3254bf215546Sopenharmony_ci{
3255bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3256bf215546Sopenharmony_ci   memcpy(&ice->state.poly_stipple, state, sizeof(*state));
3257bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_POLYGON_STIPPLE;
3258bf215546Sopenharmony_ci}
3259bf215546Sopenharmony_ci
3260bf215546Sopenharmony_ci/**
3261bf215546Sopenharmony_ci * The pipe->set_sample_mask() driver hook.
3262bf215546Sopenharmony_ci */
3263bf215546Sopenharmony_cistatic void
3264bf215546Sopenharmony_cicrocus_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3265bf215546Sopenharmony_ci{
3266bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3267bf215546Sopenharmony_ci
3268bf215546Sopenharmony_ci   /* We only support 16x MSAA, so we have 16 bits of sample maks.
3269bf215546Sopenharmony_ci    * st/mesa may pass us 0xffffffff though, meaning "enable all samples".
3270bf215546Sopenharmony_ci    */
3271bf215546Sopenharmony_ci   ice->state.sample_mask = sample_mask & 0xff;
3272bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK;
3273bf215546Sopenharmony_ci}
3274bf215546Sopenharmony_ci
3275bf215546Sopenharmony_cistatic void
3276bf215546Sopenharmony_cicrocus_fill_scissor_rect(struct crocus_context *ice,
3277bf215546Sopenharmony_ci                         int idx,
3278bf215546Sopenharmony_ci                         struct pipe_scissor_state *ss)
3279bf215546Sopenharmony_ci{
3280bf215546Sopenharmony_ci   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
3281bf215546Sopenharmony_ci   struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;
3282bf215546Sopenharmony_ci   const struct pipe_viewport_state *vp = &ice->state.viewports[idx];
3283bf215546Sopenharmony_ci   struct pipe_scissor_state scissor = (struct pipe_scissor_state) {
3284bf215546Sopenharmony_ci      .minx = MAX2(-fabsf(vp->scale[0]) + vp->translate[0], 0),
3285bf215546Sopenharmony_ci      .maxx = MIN2( fabsf(vp->scale[0]) + vp->translate[0], cso_fb->width) - 1,
3286bf215546Sopenharmony_ci      .miny = MAX2(-fabsf(vp->scale[1]) + vp->translate[1], 0),
3287bf215546Sopenharmony_ci      .maxy = MIN2( fabsf(vp->scale[1]) + vp->translate[1], cso_fb->height) - 1,
3288bf215546Sopenharmony_ci   };
3289bf215546Sopenharmony_ci   if (cso_state->scissor) {
3290bf215546Sopenharmony_ci      struct pipe_scissor_state *s = &ice->state.scissors[idx];
3291bf215546Sopenharmony_ci      scissor.minx = MAX2(scissor.minx, s->minx);
3292bf215546Sopenharmony_ci      scissor.miny = MAX2(scissor.miny, s->miny);
3293bf215546Sopenharmony_ci      scissor.maxx = MIN2(scissor.maxx, s->maxx);
3294bf215546Sopenharmony_ci      scissor.maxy = MIN2(scissor.maxy, s->maxy);
3295bf215546Sopenharmony_ci   }
3296bf215546Sopenharmony_ci   *ss = scissor;
3297bf215546Sopenharmony_ci}
3298bf215546Sopenharmony_ci
3299bf215546Sopenharmony_ci/**
3300bf215546Sopenharmony_ci * The pipe->set_scissor_states() driver hook.
3301bf215546Sopenharmony_ci *
3302bf215546Sopenharmony_ci * This corresponds to our SCISSOR_RECT state structures.  It's an
3303bf215546Sopenharmony_ci * exact match, so we just store them, and memcpy them out later.
3304bf215546Sopenharmony_ci */
3305bf215546Sopenharmony_cistatic void
3306bf215546Sopenharmony_cicrocus_set_scissor_states(struct pipe_context *ctx,
3307bf215546Sopenharmony_ci                          unsigned start_slot,
3308bf215546Sopenharmony_ci                          unsigned num_scissors,
3309bf215546Sopenharmony_ci                          const struct pipe_scissor_state *rects)
3310bf215546Sopenharmony_ci{
3311bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3312bf215546Sopenharmony_ci
3313bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_scissors; i++) {
3314bf215546Sopenharmony_ci      if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) {
3315bf215546Sopenharmony_ci         /* If the scissor was out of bounds and got clamped to 0 width/height
3316bf215546Sopenharmony_ci          * at the bounds, the subtraction of 1 from maximums could produce a
3317bf215546Sopenharmony_ci          * negative number and thus not clip anything.  Instead, just provide
3318bf215546Sopenharmony_ci          * a min > max scissor inside the bounds, which produces the expected
3319bf215546Sopenharmony_ci          * no rendering.
3320bf215546Sopenharmony_ci          */
3321bf215546Sopenharmony_ci         ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {
3322bf215546Sopenharmony_ci            .minx = 1, .maxx = 0, .miny = 1, .maxy = 0,
3323bf215546Sopenharmony_ci         };
3324bf215546Sopenharmony_ci      } else {
3325bf215546Sopenharmony_ci         ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {
3326bf215546Sopenharmony_ci            .minx = rects[i].minx,     .miny = rects[i].miny,
3327bf215546Sopenharmony_ci            .maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1,
3328bf215546Sopenharmony_ci         };
3329bf215546Sopenharmony_ci      }
3330bf215546Sopenharmony_ci   }
3331bf215546Sopenharmony_ci
3332bf215546Sopenharmony_ci#if GFX_VER < 6
3333bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_RASTER; /* SF state */
3334bf215546Sopenharmony_ci#else
3335bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
3336bf215546Sopenharmony_ci#endif
3337bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
3338bf215546Sopenharmony_ci
3339bf215546Sopenharmony_ci}
3340bf215546Sopenharmony_ci
3341bf215546Sopenharmony_ci/**
3342bf215546Sopenharmony_ci * The pipe->set_stencil_ref() driver hook.
3343bf215546Sopenharmony_ci *
3344bf215546Sopenharmony_ci * This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time.
3345bf215546Sopenharmony_ci */
3346bf215546Sopenharmony_cistatic void
3347bf215546Sopenharmony_cicrocus_set_stencil_ref(struct pipe_context *ctx,
3348bf215546Sopenharmony_ci                       const struct pipe_stencil_ref ref)
3349bf215546Sopenharmony_ci{
3350bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3351bf215546Sopenharmony_ci   ice->state.stencil_ref = ref;
3352bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
3353bf215546Sopenharmony_ci}
3354bf215546Sopenharmony_ci
3355bf215546Sopenharmony_ci#if GFX_VER == 8
3356bf215546Sopenharmony_cistatic float
3357bf215546Sopenharmony_civiewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
3358bf215546Sopenharmony_ci{
3359bf215546Sopenharmony_ci   return copysignf(state->scale[axis], sign) + state->translate[axis];
3360bf215546Sopenharmony_ci}
3361bf215546Sopenharmony_ci#endif
3362bf215546Sopenharmony_ci
3363bf215546Sopenharmony_ci/**
3364bf215546Sopenharmony_ci * The pipe->set_viewport_states() driver hook.
3365bf215546Sopenharmony_ci *
3366bf215546Sopenharmony_ci * This corresponds to our SF_CLIP_VIEWPORT states.  We can't calculate
3367bf215546Sopenharmony_ci * the guardband yet, as we need the framebuffer dimensions, but we can
3368bf215546Sopenharmony_ci * at least fill out the rest.
3369bf215546Sopenharmony_ci */
3370bf215546Sopenharmony_cistatic void
3371bf215546Sopenharmony_cicrocus_set_viewport_states(struct pipe_context *ctx,
3372bf215546Sopenharmony_ci                           unsigned start_slot,
3373bf215546Sopenharmony_ci                           unsigned count,
3374bf215546Sopenharmony_ci                           const struct pipe_viewport_state *states)
3375bf215546Sopenharmony_ci{
3376bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3377bf215546Sopenharmony_ci
3378bf215546Sopenharmony_ci   memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count);
3379bf215546Sopenharmony_ci
3380bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
3381bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_RASTER;
3382bf215546Sopenharmony_ci#if GFX_VER >= 6
3383bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
3384bf215546Sopenharmony_ci#endif
3385bf215546Sopenharmony_ci
3386bf215546Sopenharmony_ci   if (ice->state.cso_rast && (!ice->state.cso_rast->cso.depth_clip_near ||
3387bf215546Sopenharmony_ci                               !ice->state.cso_rast->cso.depth_clip_far))
3388bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
3389bf215546Sopenharmony_ci}
3390bf215546Sopenharmony_ci
3391bf215546Sopenharmony_ci/**
3392bf215546Sopenharmony_ci * The pipe->set_framebuffer_state() driver hook.
3393bf215546Sopenharmony_ci *
3394bf215546Sopenharmony_ci * Sets the current draw FBO, including color render targets, depth,
3395bf215546Sopenharmony_ci * and stencil buffers.
3396bf215546Sopenharmony_ci */
3397bf215546Sopenharmony_cistatic void
3398bf215546Sopenharmony_cicrocus_set_framebuffer_state(struct pipe_context *ctx,
3399bf215546Sopenharmony_ci                             const struct pipe_framebuffer_state *state)
3400bf215546Sopenharmony_ci{
3401bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3402bf215546Sopenharmony_ci   struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
3403bf215546Sopenharmony_ci   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
3404bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &screen->devinfo;
3405bf215546Sopenharmony_ci#if 0
3406bf215546Sopenharmony_ci   struct isl_device *isl_dev = &screen->isl_dev;
3407bf215546Sopenharmony_ci   struct crocus_resource *zres;
3408bf215546Sopenharmony_ci   struct crocus_resource *stencil_res;
3409bf215546Sopenharmony_ci#endif
3410bf215546Sopenharmony_ci
3411bf215546Sopenharmony_ci   unsigned samples = util_framebuffer_get_num_samples(state);
3412bf215546Sopenharmony_ci   unsigned layers = util_framebuffer_get_num_layers(state);
3413bf215546Sopenharmony_ci
3414bf215546Sopenharmony_ci#if GFX_VER >= 6
3415bf215546Sopenharmony_ci   if (cso->samples != samples) {
3416bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE;
3417bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK;
3418bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_RASTER;
3419bf215546Sopenharmony_ci#if GFX_VERx10 == 75
3420bf215546Sopenharmony_ci      ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;
3421bf215546Sopenharmony_ci#endif
3422bf215546Sopenharmony_ci   }
3423bf215546Sopenharmony_ci#endif
3424bf215546Sopenharmony_ci
3425bf215546Sopenharmony_ci#if GFX_VER >= 6 && GFX_VER < 8
3426bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
3427bf215546Sopenharmony_ci#endif
3428bf215546Sopenharmony_ci
3429bf215546Sopenharmony_ci   if ((cso->layers == 0) != (layers == 0)) {
3430bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_CLIP;
3431bf215546Sopenharmony_ci   }
3432bf215546Sopenharmony_ci
3433bf215546Sopenharmony_ci   if (cso->width != state->width || cso->height != state->height) {
3434bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;
3435bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_RASTER;
3436bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_DRAWING_RECTANGLE;
3437bf215546Sopenharmony_ci#if GFX_VER >= 6
3438bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
3439bf215546Sopenharmony_ci#endif
3440bf215546Sopenharmony_ci   }
3441bf215546Sopenharmony_ci
3442bf215546Sopenharmony_ci   if (cso->zsbuf || state->zsbuf) {
3443bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER;
3444bf215546Sopenharmony_ci
3445bf215546Sopenharmony_ci      /* update SF's depth buffer format */
3446bf215546Sopenharmony_ci      if (GFX_VER == 7 && cso->zsbuf)
3447bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_RASTER;
3448bf215546Sopenharmony_ci   }
3449bf215546Sopenharmony_ci
3450bf215546Sopenharmony_ci   /* wm thread dispatch enable */
3451bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_WM;
3452bf215546Sopenharmony_ci   util_copy_framebuffer_state(cso, state);
3453bf215546Sopenharmony_ci   cso->samples = samples;
3454bf215546Sopenharmony_ci   cso->layers = layers;
3455bf215546Sopenharmony_ci
3456bf215546Sopenharmony_ci   if (cso->zsbuf) {
3457bf215546Sopenharmony_ci      struct crocus_resource *zres;
3458bf215546Sopenharmony_ci      struct crocus_resource *stencil_res;
3459bf215546Sopenharmony_ci      enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
3460bf215546Sopenharmony_ci      crocus_get_depth_stencil_resources(devinfo, cso->zsbuf->texture, &zres,
3461bf215546Sopenharmony_ci                                         &stencil_res);
3462bf215546Sopenharmony_ci      if (zres && crocus_resource_level_has_hiz(zres, cso->zsbuf->u.tex.level)) {
3463bf215546Sopenharmony_ci         aux_usage = zres->aux.usage;
3464bf215546Sopenharmony_ci      }
3465bf215546Sopenharmony_ci      ice->state.hiz_usage = aux_usage;
3466bf215546Sopenharmony_ci   }
3467bf215546Sopenharmony_ci
3468bf215546Sopenharmony_ci   /* Render target change */
3469bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS;
3470bf215546Sopenharmony_ci
3471bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
3472bf215546Sopenharmony_ci
3473bf215546Sopenharmony_ci   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_FRAMEBUFFER];
3474bf215546Sopenharmony_ci}
3475bf215546Sopenharmony_ci
3476bf215546Sopenharmony_ci/**
3477bf215546Sopenharmony_ci * The pipe->set_constant_buffer() driver hook.
3478bf215546Sopenharmony_ci *
3479bf215546Sopenharmony_ci * This uploads any constant data in user buffers, and references
3480bf215546Sopenharmony_ci * any UBO resources containing constant data.
3481bf215546Sopenharmony_ci */
3482bf215546Sopenharmony_cistatic void
3483bf215546Sopenharmony_cicrocus_set_constant_buffer(struct pipe_context *ctx,
3484bf215546Sopenharmony_ci                           enum pipe_shader_type p_stage, unsigned index,
3485bf215546Sopenharmony_ci                           bool take_ownership,
3486bf215546Sopenharmony_ci                           const struct pipe_constant_buffer *input)
3487bf215546Sopenharmony_ci{
3488bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3489bf215546Sopenharmony_ci   gl_shader_stage stage = stage_from_pipe(p_stage);
3490bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
3491bf215546Sopenharmony_ci   struct pipe_constant_buffer *cbuf = &shs->constbufs[index];
3492bf215546Sopenharmony_ci
3493bf215546Sopenharmony_ci   util_copy_constant_buffer(&shs->constbufs[index], input, take_ownership);
3494bf215546Sopenharmony_ci
3495bf215546Sopenharmony_ci   if (input && input->buffer_size && (input->buffer || input->user_buffer)) {
3496bf215546Sopenharmony_ci      shs->bound_cbufs |= 1u << index;
3497bf215546Sopenharmony_ci
3498bf215546Sopenharmony_ci      if (input->user_buffer) {
3499bf215546Sopenharmony_ci         void *map = NULL;
3500bf215546Sopenharmony_ci         pipe_resource_reference(&cbuf->buffer, NULL);
3501bf215546Sopenharmony_ci         u_upload_alloc(ice->ctx.const_uploader, 0, input->buffer_size, 64,
3502bf215546Sopenharmony_ci                        &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
3503bf215546Sopenharmony_ci
3504bf215546Sopenharmony_ci         if (!cbuf->buffer) {
3505bf215546Sopenharmony_ci            /* Allocation was unsuccessful - just unbind */
3506bf215546Sopenharmony_ci            crocus_set_constant_buffer(ctx, p_stage, index, false, NULL);
3507bf215546Sopenharmony_ci            return;
3508bf215546Sopenharmony_ci         }
3509bf215546Sopenharmony_ci
3510bf215546Sopenharmony_ci         assert(map);
3511bf215546Sopenharmony_ci         memcpy(map, input->user_buffer, input->buffer_size);
3512bf215546Sopenharmony_ci      }
3513bf215546Sopenharmony_ci      cbuf->buffer_size =
3514bf215546Sopenharmony_ci         MIN2(input->buffer_size,
3515bf215546Sopenharmony_ci              crocus_resource_bo(cbuf->buffer)->size - cbuf->buffer_offset);
3516bf215546Sopenharmony_ci
3517bf215546Sopenharmony_ci      struct crocus_resource *res = (void *) cbuf->buffer;
3518bf215546Sopenharmony_ci      res->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
3519bf215546Sopenharmony_ci      res->bind_stages |= 1 << stage;
3520bf215546Sopenharmony_ci   } else {
3521bf215546Sopenharmony_ci      shs->bound_cbufs &= ~(1u << index);
3522bf215546Sopenharmony_ci   }
3523bf215546Sopenharmony_ci
3524bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage;
3525bf215546Sopenharmony_ci}
3526bf215546Sopenharmony_ci
3527bf215546Sopenharmony_cistatic void
3528bf215546Sopenharmony_ciupload_sysvals(struct crocus_context *ice,
3529bf215546Sopenharmony_ci               gl_shader_stage stage)
3530bf215546Sopenharmony_ci{
3531bf215546Sopenharmony_ci   UNUSED struct crocus_genx_state *genx = ice->state.genx;
3532bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
3533bf215546Sopenharmony_ci
3534bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
3535bf215546Sopenharmony_ci   if (!shader || shader->num_system_values == 0)
3536bf215546Sopenharmony_ci      return;
3537bf215546Sopenharmony_ci
3538bf215546Sopenharmony_ci   assert(shader->num_cbufs > 0);
3539bf215546Sopenharmony_ci
3540bf215546Sopenharmony_ci   unsigned sysval_cbuf_index = shader->num_cbufs - 1;
3541bf215546Sopenharmony_ci   struct pipe_constant_buffer *cbuf = &shs->constbufs[sysval_cbuf_index];
3542bf215546Sopenharmony_ci   unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
3543bf215546Sopenharmony_ci   uint32_t *map = NULL;
3544bf215546Sopenharmony_ci
3545bf215546Sopenharmony_ci   assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
3546bf215546Sopenharmony_ci   u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
3547bf215546Sopenharmony_ci                  &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
3548bf215546Sopenharmony_ci
3549bf215546Sopenharmony_ci   for (int i = 0; i < shader->num_system_values; i++) {
3550bf215546Sopenharmony_ci      uint32_t sysval = shader->system_values[i];
3551bf215546Sopenharmony_ci      uint32_t value = 0;
3552bf215546Sopenharmony_ci
3553bf215546Sopenharmony_ci      if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) {
3554bf215546Sopenharmony_ci#if GFX_VER >= 7
3555bf215546Sopenharmony_ci         unsigned img = BRW_PARAM_IMAGE_IDX(sysval);
3556bf215546Sopenharmony_ci         unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval);
3557bf215546Sopenharmony_ci         struct brw_image_param *param =
3558bf215546Sopenharmony_ci            &genx->shaders[stage].image_param[img];
3559bf215546Sopenharmony_ci
3560bf215546Sopenharmony_ci         assert(offset < sizeof(struct brw_image_param));
3561bf215546Sopenharmony_ci         value = ((uint32_t *) param)[offset];
3562bf215546Sopenharmony_ci#endif
3563bf215546Sopenharmony_ci      } else if (sysval == BRW_PARAM_BUILTIN_ZERO) {
3564bf215546Sopenharmony_ci         value = 0;
3565bf215546Sopenharmony_ci      } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) {
3566bf215546Sopenharmony_ci         int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval);
3567bf215546Sopenharmony_ci         int comp  = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval);
3568bf215546Sopenharmony_ci         value = fui(ice->state.clip_planes.ucp[plane][comp]);
3569bf215546Sopenharmony_ci      } else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) {
3570bf215546Sopenharmony_ci         if (stage == MESA_SHADER_TESS_CTRL) {
3571bf215546Sopenharmony_ci            value = ice->state.vertices_per_patch;
3572bf215546Sopenharmony_ci         } else {
3573bf215546Sopenharmony_ci            assert(stage == MESA_SHADER_TESS_EVAL);
3574bf215546Sopenharmony_ci            const struct shader_info *tcs_info =
3575bf215546Sopenharmony_ci               crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
3576bf215546Sopenharmony_ci            if (tcs_info)
3577bf215546Sopenharmony_ci               value = tcs_info->tess.tcs_vertices_out;
3578bf215546Sopenharmony_ci            else
3579bf215546Sopenharmony_ci               value = ice->state.vertices_per_patch;
3580bf215546Sopenharmony_ci         }
3581bf215546Sopenharmony_ci      } else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
3582bf215546Sopenharmony_ci                 sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
3583bf215546Sopenharmony_ci         unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
3584bf215546Sopenharmony_ci         value = fui(ice->state.default_outer_level[i]);
3585bf215546Sopenharmony_ci      } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
3586bf215546Sopenharmony_ci         value = fui(ice->state.default_inner_level[0]);
3587bf215546Sopenharmony_ci      } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
3588bf215546Sopenharmony_ci         value = fui(ice->state.default_inner_level[1]);
3589bf215546Sopenharmony_ci      } else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&
3590bf215546Sopenharmony_ci                 sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {
3591bf215546Sopenharmony_ci         unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;
3592bf215546Sopenharmony_ci         value = ice->state.last_block[i];
3593bf215546Sopenharmony_ci      } else {
3594bf215546Sopenharmony_ci         assert(!"unhandled system value");
3595bf215546Sopenharmony_ci      }
3596bf215546Sopenharmony_ci
3597bf215546Sopenharmony_ci      *map++ = value;
3598bf215546Sopenharmony_ci   }
3599bf215546Sopenharmony_ci
3600bf215546Sopenharmony_ci   cbuf->buffer_size = upload_size;
3601bf215546Sopenharmony_ci   shs->sysvals_need_upload = false;
3602bf215546Sopenharmony_ci}
3603bf215546Sopenharmony_ci
3604bf215546Sopenharmony_ci/**
3605bf215546Sopenharmony_ci * The pipe->set_shader_buffers() driver hook.
3606bf215546Sopenharmony_ci *
3607bf215546Sopenharmony_ci * This binds SSBOs and ABOs.  Unfortunately, we need to stream out
3608bf215546Sopenharmony_ci * SURFACE_STATE here, as the buffer offset may change each time.
3609bf215546Sopenharmony_ci */
3610bf215546Sopenharmony_cistatic void
3611bf215546Sopenharmony_cicrocus_set_shader_buffers(struct pipe_context *ctx,
3612bf215546Sopenharmony_ci                          enum pipe_shader_type p_stage,
3613bf215546Sopenharmony_ci                          unsigned start_slot, unsigned count,
3614bf215546Sopenharmony_ci                          const struct pipe_shader_buffer *buffers,
3615bf215546Sopenharmony_ci                          unsigned writable_bitmask)
3616bf215546Sopenharmony_ci{
3617bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3618bf215546Sopenharmony_ci   gl_shader_stage stage = stage_from_pipe(p_stage);
3619bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
3620bf215546Sopenharmony_ci
3621bf215546Sopenharmony_ci   unsigned modified_bits = u_bit_consecutive(start_slot, count);
3622bf215546Sopenharmony_ci
3623bf215546Sopenharmony_ci   shs->bound_ssbos &= ~modified_bits;
3624bf215546Sopenharmony_ci   shs->writable_ssbos &= ~modified_bits;
3625bf215546Sopenharmony_ci   shs->writable_ssbos |= writable_bitmask << start_slot;
3626bf215546Sopenharmony_ci
3627bf215546Sopenharmony_ci   for (unsigned i = 0; i < count; i++) {
3628bf215546Sopenharmony_ci      if (buffers && buffers[i].buffer) {
3629bf215546Sopenharmony_ci         struct crocus_resource *res = (void *) buffers[i].buffer;
3630bf215546Sopenharmony_ci         struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i];
3631bf215546Sopenharmony_ci         pipe_resource_reference(&ssbo->buffer, &res->base.b);
3632bf215546Sopenharmony_ci         ssbo->buffer_offset = buffers[i].buffer_offset;
3633bf215546Sopenharmony_ci         ssbo->buffer_size =
3634bf215546Sopenharmony_ci            MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset);
3635bf215546Sopenharmony_ci
3636bf215546Sopenharmony_ci         shs->bound_ssbos |= 1 << (start_slot + i);
3637bf215546Sopenharmony_ci
3638bf215546Sopenharmony_ci         res->bind_history |= PIPE_BIND_SHADER_BUFFER;
3639bf215546Sopenharmony_ci         res->bind_stages |= 1 << stage;
3640bf215546Sopenharmony_ci
3641bf215546Sopenharmony_ci         util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset,
3642bf215546Sopenharmony_ci                        ssbo->buffer_offset + ssbo->buffer_size);
3643bf215546Sopenharmony_ci      } else {
3644bf215546Sopenharmony_ci         pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL);
3645bf215546Sopenharmony_ci      }
3646bf215546Sopenharmony_ci   }
3647bf215546Sopenharmony_ci
3648bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;
3649bf215546Sopenharmony_ci}
3650bf215546Sopenharmony_ci
3651bf215546Sopenharmony_cistatic void
3652bf215546Sopenharmony_cicrocus_delete_state(struct pipe_context *ctx, void *state)
3653bf215546Sopenharmony_ci{
3654bf215546Sopenharmony_ci   free(state);
3655bf215546Sopenharmony_ci}
3656bf215546Sopenharmony_ci
3657bf215546Sopenharmony_ci/**
3658bf215546Sopenharmony_ci * The pipe->set_vertex_buffers() driver hook.
3659bf215546Sopenharmony_ci *
3660bf215546Sopenharmony_ci * This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet.
3661bf215546Sopenharmony_ci */
3662bf215546Sopenharmony_cistatic void
3663bf215546Sopenharmony_cicrocus_set_vertex_buffers(struct pipe_context *ctx,
3664bf215546Sopenharmony_ci                          unsigned start_slot, unsigned count,
3665bf215546Sopenharmony_ci                          unsigned unbind_num_trailing_slots,
3666bf215546Sopenharmony_ci                          bool take_ownership,
3667bf215546Sopenharmony_ci                          const struct pipe_vertex_buffer *buffers)
3668bf215546Sopenharmony_ci{
3669bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3670bf215546Sopenharmony_ci   struct crocus_screen *screen = (struct crocus_screen *) ctx->screen;
3671bf215546Sopenharmony_ci   const unsigned padding =
3672bf215546Sopenharmony_ci      (GFX_VERx10 < 75 && screen->devinfo.platform != INTEL_PLATFORM_BYT) * 2;
3673bf215546Sopenharmony_ci   ice->state.bound_vertex_buffers &=
3674bf215546Sopenharmony_ci      ~u_bit_consecutive64(start_slot, count + unbind_num_trailing_slots);
3675bf215546Sopenharmony_ci
3676bf215546Sopenharmony_ci   util_set_vertex_buffers_mask(ice->state.vertex_buffers, &ice->state.bound_vertex_buffers,
3677bf215546Sopenharmony_ci                                buffers, start_slot, count, unbind_num_trailing_slots,
3678bf215546Sopenharmony_ci                                take_ownership);
3679bf215546Sopenharmony_ci
3680bf215546Sopenharmony_ci   for (unsigned i = 0; i < count; i++) {
3681bf215546Sopenharmony_ci      struct pipe_vertex_buffer *state =
3682bf215546Sopenharmony_ci         &ice->state.vertex_buffers[start_slot + i];
3683bf215546Sopenharmony_ci
3684bf215546Sopenharmony_ci      if (!state->is_user_buffer && state->buffer.resource) {
3685bf215546Sopenharmony_ci         struct crocus_resource *res = (void *)state->buffer.resource;
3686bf215546Sopenharmony_ci         res->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3687bf215546Sopenharmony_ci      }
3688bf215546Sopenharmony_ci
3689bf215546Sopenharmony_ci      uint32_t end = 0;
3690bf215546Sopenharmony_ci      if (state->buffer.resource)
3691bf215546Sopenharmony_ci         end = state->buffer.resource->width0 + padding;
3692bf215546Sopenharmony_ci      ice->state.vb_end[start_slot + i] = end;
3693bf215546Sopenharmony_ci   }
3694bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS;
3695bf215546Sopenharmony_ci}
3696bf215546Sopenharmony_ci
3697bf215546Sopenharmony_ci#if GFX_VERx10 < 75
3698bf215546Sopenharmony_cistatic uint8_t get_wa_flags(enum isl_format format)
3699bf215546Sopenharmony_ci{
3700bf215546Sopenharmony_ci   uint8_t wa_flags = 0;
3701bf215546Sopenharmony_ci
3702bf215546Sopenharmony_ci   switch (format) {
3703bf215546Sopenharmony_ci   case ISL_FORMAT_R10G10B10A2_USCALED:
3704bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SCALE;
3705bf215546Sopenharmony_ci      break;
3706bf215546Sopenharmony_ci   case ISL_FORMAT_R10G10B10A2_SSCALED:
3707bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE;
3708bf215546Sopenharmony_ci      break;
3709bf215546Sopenharmony_ci   case ISL_FORMAT_R10G10B10A2_UNORM:
3710bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_NORMALIZE;
3711bf215546Sopenharmony_ci      break;
3712bf215546Sopenharmony_ci   case ISL_FORMAT_R10G10B10A2_SNORM:
3713bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE;
3714bf215546Sopenharmony_ci      break;
3715bf215546Sopenharmony_ci   case ISL_FORMAT_R10G10B10A2_SINT:
3716bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SIGN;
3717bf215546Sopenharmony_ci      break;
3718bf215546Sopenharmony_ci   case ISL_FORMAT_B10G10R10A2_USCALED:
3719bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA;
3720bf215546Sopenharmony_ci      break;
3721bf215546Sopenharmony_ci   case ISL_FORMAT_B10G10R10A2_SSCALED:
3722bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA;
3723bf215546Sopenharmony_ci      break;
3724bf215546Sopenharmony_ci   case ISL_FORMAT_B10G10R10A2_UNORM:
3725bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA;
3726bf215546Sopenharmony_ci      break;
3727bf215546Sopenharmony_ci   case ISL_FORMAT_B10G10R10A2_SNORM:
3728bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA;
3729bf215546Sopenharmony_ci      break;
3730bf215546Sopenharmony_ci   case ISL_FORMAT_B10G10R10A2_SINT:
3731bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_BGRA;
3732bf215546Sopenharmony_ci      break;
3733bf215546Sopenharmony_ci   case ISL_FORMAT_B10G10R10A2_UINT:
3734bf215546Sopenharmony_ci      wa_flags = BRW_ATTRIB_WA_BGRA;
3735bf215546Sopenharmony_ci      break;
3736bf215546Sopenharmony_ci   default:
3737bf215546Sopenharmony_ci      break;
3738bf215546Sopenharmony_ci   }
3739bf215546Sopenharmony_ci   return wa_flags;
3740bf215546Sopenharmony_ci}
3741bf215546Sopenharmony_ci#endif
3742bf215546Sopenharmony_ci
3743bf215546Sopenharmony_ci/**
3744bf215546Sopenharmony_ci * Gallium CSO for vertex elements.
3745bf215546Sopenharmony_ci */
3746bf215546Sopenharmony_cistruct crocus_vertex_element_state {
3747bf215546Sopenharmony_ci   uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
3748bf215546Sopenharmony_ci#if GFX_VER == 8
3749bf215546Sopenharmony_ci   uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)];
3750bf215546Sopenharmony_ci#endif
3751bf215546Sopenharmony_ci   uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)];
3752bf215546Sopenharmony_ci#if GFX_VER == 8
3753bf215546Sopenharmony_ci   uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)];
3754bf215546Sopenharmony_ci#endif
3755bf215546Sopenharmony_ci   uint32_t step_rate[16];
3756bf215546Sopenharmony_ci   uint8_t wa_flags[33];
3757bf215546Sopenharmony_ci   unsigned count;
3758bf215546Sopenharmony_ci};
3759bf215546Sopenharmony_ci
3760bf215546Sopenharmony_ci/**
3761bf215546Sopenharmony_ci * The pipe->create_vertex_elements() driver hook.
3762bf215546Sopenharmony_ci *
3763bf215546Sopenharmony_ci * This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS
3764bf215546Sopenharmony_ci * and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing
3765bf215546Sopenharmony_ci * arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are
3766bf215546Sopenharmony_ci * needed. In these cases we will need information available at draw time.
3767bf215546Sopenharmony_ci * We setup edgeflag_ve and edgeflag_vfi as alternatives last
3768bf215546Sopenharmony_ci * 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at
3769bf215546Sopenharmony_ci * draw time if we detect that EdgeFlag is needed by the Vertex Shader.
3770bf215546Sopenharmony_ci */
3771bf215546Sopenharmony_cistatic void *
3772bf215546Sopenharmony_cicrocus_create_vertex_elements(struct pipe_context *ctx,
3773bf215546Sopenharmony_ci                              unsigned count,
3774bf215546Sopenharmony_ci                              const struct pipe_vertex_element *state)
3775bf215546Sopenharmony_ci{
3776bf215546Sopenharmony_ci   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
3777bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &screen->devinfo;
3778bf215546Sopenharmony_ci   struct crocus_vertex_element_state *cso =
3779bf215546Sopenharmony_ci      malloc(sizeof(struct crocus_vertex_element_state));
3780bf215546Sopenharmony_ci
3781bf215546Sopenharmony_ci   cso->count = count;
3782bf215546Sopenharmony_ci
3783bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) {
3784bf215546Sopenharmony_ci      ve.DWordLength =
3785bf215546Sopenharmony_ci         1 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2;
3786bf215546Sopenharmony_ci   }
3787bf215546Sopenharmony_ci
3788bf215546Sopenharmony_ci   uint32_t *ve_pack_dest = &cso->vertex_elements[1];
3789bf215546Sopenharmony_ci#if GFX_VER == 8
3790bf215546Sopenharmony_ci   uint32_t *vfi_pack_dest = cso->vf_instancing;
3791bf215546Sopenharmony_ci#endif
3792bf215546Sopenharmony_ci
3793bf215546Sopenharmony_ci   if (count == 0) {
3794bf215546Sopenharmony_ci      crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
3795bf215546Sopenharmony_ci         ve.Valid = true;
3796bf215546Sopenharmony_ci         ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
3797bf215546Sopenharmony_ci         ve.Component0Control = VFCOMP_STORE_0;
3798bf215546Sopenharmony_ci         ve.Component1Control = VFCOMP_STORE_0;
3799bf215546Sopenharmony_ci         ve.Component2Control = VFCOMP_STORE_0;
3800bf215546Sopenharmony_ci         ve.Component3Control = VFCOMP_STORE_1_FP;
3801bf215546Sopenharmony_ci      }
3802bf215546Sopenharmony_ci#if GFX_VER == 8
3803bf215546Sopenharmony_ci      crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
3804bf215546Sopenharmony_ci      }
3805bf215546Sopenharmony_ci#endif
3806bf215546Sopenharmony_ci   }
3807bf215546Sopenharmony_ci
3808bf215546Sopenharmony_ci   for (int i = 0; i < count; i++) {
3809bf215546Sopenharmony_ci      const struct crocus_format_info fmt =
3810bf215546Sopenharmony_ci         crocus_format_for_usage(devinfo, state[i].src_format, 0);
3811bf215546Sopenharmony_ci      unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,
3812bf215546Sopenharmony_ci                           VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };
3813bf215546Sopenharmony_ci      enum isl_format actual_fmt = fmt.fmt;
3814bf215546Sopenharmony_ci
3815bf215546Sopenharmony_ci#if GFX_VERx10 < 75
3816bf215546Sopenharmony_ci      cso->wa_flags[i] = get_wa_flags(fmt.fmt);
3817bf215546Sopenharmony_ci
3818bf215546Sopenharmony_ci      if (fmt.fmt == ISL_FORMAT_R10G10B10A2_USCALED ||
3819bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_R10G10B10A2_SSCALED ||
3820bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_R10G10B10A2_UNORM ||
3821bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_R10G10B10A2_SNORM ||
3822bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_R10G10B10A2_SINT ||
3823bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_B10G10R10A2_USCALED ||
3824bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_B10G10R10A2_SSCALED ||
3825bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_B10G10R10A2_UNORM ||
3826bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_B10G10R10A2_SNORM ||
3827bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_B10G10R10A2_UINT ||
3828bf215546Sopenharmony_ci          fmt.fmt == ISL_FORMAT_B10G10R10A2_SINT)
3829bf215546Sopenharmony_ci         actual_fmt = ISL_FORMAT_R10G10B10A2_UINT;
3830bf215546Sopenharmony_ci      if (fmt.fmt == ISL_FORMAT_R8G8B8_SINT)
3831bf215546Sopenharmony_ci         actual_fmt = ISL_FORMAT_R8G8B8A8_SINT;
3832bf215546Sopenharmony_ci      if (fmt.fmt == ISL_FORMAT_R8G8B8_UINT)
3833bf215546Sopenharmony_ci         actual_fmt = ISL_FORMAT_R8G8B8A8_UINT;
3834bf215546Sopenharmony_ci      if (fmt.fmt == ISL_FORMAT_R16G16B16_SINT)
3835bf215546Sopenharmony_ci         actual_fmt = ISL_FORMAT_R16G16B16A16_SINT;
3836bf215546Sopenharmony_ci      if (fmt.fmt == ISL_FORMAT_R16G16B16_UINT)
3837bf215546Sopenharmony_ci         actual_fmt = ISL_FORMAT_R16G16B16A16_UINT;
3838bf215546Sopenharmony_ci#endif
3839bf215546Sopenharmony_ci
3840bf215546Sopenharmony_ci      cso->step_rate[state[i].vertex_buffer_index] = state[i].instance_divisor;
3841bf215546Sopenharmony_ci
3842bf215546Sopenharmony_ci      switch (isl_format_get_num_channels(fmt.fmt)) {
3843bf215546Sopenharmony_ci      case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH;
3844bf215546Sopenharmony_ci      case 1: comp[1] = VFCOMP_STORE_0; FALLTHROUGH;
3845bf215546Sopenharmony_ci      case 2: comp[2] = VFCOMP_STORE_0; FALLTHROUGH;
3846bf215546Sopenharmony_ci      case 3:
3847bf215546Sopenharmony_ci         comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT
3848bf215546Sopenharmony_ci            : VFCOMP_STORE_1_FP;
3849bf215546Sopenharmony_ci         break;
3850bf215546Sopenharmony_ci      }
3851bf215546Sopenharmony_ci      crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
3852bf215546Sopenharmony_ci#if GFX_VER >= 6
3853bf215546Sopenharmony_ci         ve.EdgeFlagEnable = false;
3854bf215546Sopenharmony_ci#endif
3855bf215546Sopenharmony_ci         ve.VertexBufferIndex = state[i].vertex_buffer_index;
3856bf215546Sopenharmony_ci         ve.Valid = true;
3857bf215546Sopenharmony_ci         ve.SourceElementOffset = state[i].src_offset;
3858bf215546Sopenharmony_ci         ve.SourceElementFormat = actual_fmt;
3859bf215546Sopenharmony_ci         ve.Component0Control = comp[0];
3860bf215546Sopenharmony_ci         ve.Component1Control = comp[1];
3861bf215546Sopenharmony_ci         ve.Component2Control = comp[2];
3862bf215546Sopenharmony_ci         ve.Component3Control = comp[3];
3863bf215546Sopenharmony_ci#if GFX_VER < 5
3864bf215546Sopenharmony_ci         ve.DestinationElementOffset = i * 4;
3865bf215546Sopenharmony_ci#endif
3866bf215546Sopenharmony_ci      }
3867bf215546Sopenharmony_ci
3868bf215546Sopenharmony_ci#if GFX_VER == 8
3869bf215546Sopenharmony_ci      crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
3870bf215546Sopenharmony_ci         vi.VertexElementIndex = i;
3871bf215546Sopenharmony_ci         vi.InstancingEnable = state[i].instance_divisor > 0;
3872bf215546Sopenharmony_ci         vi.InstanceDataStepRate = state[i].instance_divisor;
3873bf215546Sopenharmony_ci      }
3874bf215546Sopenharmony_ci#endif
3875bf215546Sopenharmony_ci      ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
3876bf215546Sopenharmony_ci#if GFX_VER == 8
3877bf215546Sopenharmony_ci      vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length);
3878bf215546Sopenharmony_ci#endif
3879bf215546Sopenharmony_ci   }
3880bf215546Sopenharmony_ci
3881bf215546Sopenharmony_ci   /* An alternative version of the last VE and VFI is stored so it
3882bf215546Sopenharmony_ci    * can be used at draw time in case Vertex Shader uses EdgeFlag
3883bf215546Sopenharmony_ci    */
3884bf215546Sopenharmony_ci   if (count) {
3885bf215546Sopenharmony_ci      const unsigned edgeflag_index = count - 1;
3886bf215546Sopenharmony_ci      const struct crocus_format_info fmt =
3887bf215546Sopenharmony_ci         crocus_format_for_usage(devinfo, state[edgeflag_index].src_format, 0);
3888bf215546Sopenharmony_ci      crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) {
3889bf215546Sopenharmony_ci#if GFX_VER >= 6
3890bf215546Sopenharmony_ci         ve.EdgeFlagEnable = true;
3891bf215546Sopenharmony_ci#endif
3892bf215546Sopenharmony_ci         ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index;
3893bf215546Sopenharmony_ci         ve.Valid = true;
3894bf215546Sopenharmony_ci         ve.SourceElementOffset = state[edgeflag_index].src_offset;
3895bf215546Sopenharmony_ci         ve.SourceElementFormat = fmt.fmt;
3896bf215546Sopenharmony_ci         ve.Component0Control = VFCOMP_STORE_SRC;
3897bf215546Sopenharmony_ci         ve.Component1Control = VFCOMP_STORE_0;
3898bf215546Sopenharmony_ci         ve.Component2Control = VFCOMP_STORE_0;
3899bf215546Sopenharmony_ci         ve.Component3Control = VFCOMP_STORE_0;
3900bf215546Sopenharmony_ci      }
3901bf215546Sopenharmony_ci#if GFX_VER == 8
3902bf215546Sopenharmony_ci      crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) {
3903bf215546Sopenharmony_ci         /* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled
3904bf215546Sopenharmony_ci          * at draw time, as it should change if SGVs are emitted.
3905bf215546Sopenharmony_ci          */
3906bf215546Sopenharmony_ci         vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0;
3907bf215546Sopenharmony_ci         vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor;
3908bf215546Sopenharmony_ci      }
3909bf215546Sopenharmony_ci#endif
3910bf215546Sopenharmony_ci   }
3911bf215546Sopenharmony_ci
3912bf215546Sopenharmony_ci   return cso;
3913bf215546Sopenharmony_ci}
3914bf215546Sopenharmony_ci
3915bf215546Sopenharmony_ci/**
3916bf215546Sopenharmony_ci * The pipe->bind_vertex_elements_state() driver hook.
3917bf215546Sopenharmony_ci */
3918bf215546Sopenharmony_cistatic void
3919bf215546Sopenharmony_cicrocus_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
3920bf215546Sopenharmony_ci{
3921bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
3922bf215546Sopenharmony_ci#if GFX_VER == 8
3923bf215546Sopenharmony_ci   struct crocus_vertex_element_state *old_cso = ice->state.cso_vertex_elements;
3924bf215546Sopenharmony_ci   struct crocus_vertex_element_state *new_cso = state;
3925bf215546Sopenharmony_ci
3926bf215546Sopenharmony_ci   if (new_cso && cso_changed(count))
3927bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS;
3928bf215546Sopenharmony_ci#endif
3929bf215546Sopenharmony_ci   ice->state.cso_vertex_elements = state;
3930bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS;
3931bf215546Sopenharmony_ci   ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_VERTEX_ELEMENTS];
3932bf215546Sopenharmony_ci}
3933bf215546Sopenharmony_ci
3934bf215546Sopenharmony_ci#if GFX_VER >= 6
3935bf215546Sopenharmony_cistruct crocus_streamout_counter {
3936bf215546Sopenharmony_ci   uint32_t offset_start;
3937bf215546Sopenharmony_ci   uint32_t offset_end;
3938bf215546Sopenharmony_ci
3939bf215546Sopenharmony_ci   uint64_t accum;
3940bf215546Sopenharmony_ci};
3941bf215546Sopenharmony_ci
3942bf215546Sopenharmony_ci/**
3943bf215546Sopenharmony_ci * Gallium CSO for stream output (transform feedback) targets.
3944bf215546Sopenharmony_ci */
3945bf215546Sopenharmony_cistruct crocus_stream_output_target {
3946bf215546Sopenharmony_ci   struct pipe_stream_output_target base;
3947bf215546Sopenharmony_ci
3948bf215546Sopenharmony_ci   /** Stride (bytes-per-vertex) during this transform feedback operation */
3949bf215546Sopenharmony_ci   uint16_t stride;
3950bf215546Sopenharmony_ci
3951bf215546Sopenharmony_ci   /** Has 3DSTATE_SO_BUFFER actually been emitted, zeroing the offsets? */
3952bf215546Sopenharmony_ci   bool zeroed;
3953bf215546Sopenharmony_ci
3954bf215546Sopenharmony_ci   struct crocus_resource *offset_res;
3955bf215546Sopenharmony_ci   uint32_t offset_offset;
3956bf215546Sopenharmony_ci
3957bf215546Sopenharmony_ci#if GFX_VER == 6
3958bf215546Sopenharmony_ci   void *prim_map;
3959bf215546Sopenharmony_ci   struct crocus_streamout_counter prev_count;
3960bf215546Sopenharmony_ci   struct crocus_streamout_counter count;
3961bf215546Sopenharmony_ci#endif
3962bf215546Sopenharmony_ci#if GFX_VER == 8
3963bf215546Sopenharmony_ci   /** Does the next 3DSTATE_SO_BUFFER need to zero the offsets? */
3964bf215546Sopenharmony_ci   bool zero_offset;
3965bf215546Sopenharmony_ci#endif
3966bf215546Sopenharmony_ci};
3967bf215546Sopenharmony_ci
3968bf215546Sopenharmony_ci#if GFX_VER >= 7
3969bf215546Sopenharmony_cistatic uint32_t
3970bf215546Sopenharmony_cicrocus_get_so_offset(struct pipe_stream_output_target *so)
3971bf215546Sopenharmony_ci{
3972bf215546Sopenharmony_ci   struct crocus_stream_output_target *tgt = (void *)so;
3973bf215546Sopenharmony_ci   struct pipe_transfer *transfer;
3974bf215546Sopenharmony_ci   struct pipe_box box;
3975bf215546Sopenharmony_ci   uint32_t result;
3976bf215546Sopenharmony_ci   u_box_1d(tgt->offset_offset, 4, &box);
3977bf215546Sopenharmony_ci   void *val = so->context->buffer_map(so->context, &tgt->offset_res->base.b,
3978bf215546Sopenharmony_ci                                       0, PIPE_MAP_DIRECTLY,
3979bf215546Sopenharmony_ci                                       &box, &transfer);
3980bf215546Sopenharmony_ci   assert(val);
3981bf215546Sopenharmony_ci   result = *(uint32_t *)val;
3982bf215546Sopenharmony_ci   so->context->buffer_unmap(so->context, transfer);
3983bf215546Sopenharmony_ci
3984bf215546Sopenharmony_ci   return result / tgt->stride;
3985bf215546Sopenharmony_ci}
3986bf215546Sopenharmony_ci#endif
3987bf215546Sopenharmony_ci
3988bf215546Sopenharmony_ci#if GFX_VER == 6
3989bf215546Sopenharmony_cistatic void
3990bf215546Sopenharmony_cicompute_vertices_written_so_far(struct crocus_context *ice,
3991bf215546Sopenharmony_ci                                struct crocus_stream_output_target *tgt,
3992bf215546Sopenharmony_ci                                struct crocus_streamout_counter *count,
3993bf215546Sopenharmony_ci                                uint64_t *svbi);
3994bf215546Sopenharmony_ci
3995bf215546Sopenharmony_cistatic uint32_t
3996bf215546Sopenharmony_cicrocus_get_so_offset(struct pipe_stream_output_target *so)
3997bf215546Sopenharmony_ci{
3998bf215546Sopenharmony_ci   struct crocus_stream_output_target *tgt = (void *)so;
3999bf215546Sopenharmony_ci   struct crocus_context *ice = (void *)so->context;
4000bf215546Sopenharmony_ci
4001bf215546Sopenharmony_ci   uint64_t vert_written;
4002bf215546Sopenharmony_ci   compute_vertices_written_so_far(ice, tgt, &tgt->prev_count, &vert_written);
4003bf215546Sopenharmony_ci   return vert_written;
4004bf215546Sopenharmony_ci}
4005bf215546Sopenharmony_ci#endif
4006bf215546Sopenharmony_ci
4007bf215546Sopenharmony_ci/**
4008bf215546Sopenharmony_ci * The pipe->create_stream_output_target() driver hook.
4009bf215546Sopenharmony_ci *
4010bf215546Sopenharmony_ci * "Target" here refers to a destination buffer.  We translate this into
4011bf215546Sopenharmony_ci * a 3DSTATE_SO_BUFFER packet.  We can handle most fields, but don't yet
4012bf215546Sopenharmony_ci * know which buffer this represents, or whether we ought to zero the
4013bf215546Sopenharmony_ci * write-offsets, or append.  Those are handled in the set() hook.
4014bf215546Sopenharmony_ci */
4015bf215546Sopenharmony_cistatic struct pipe_stream_output_target *
4016bf215546Sopenharmony_cicrocus_create_stream_output_target(struct pipe_context *ctx,
4017bf215546Sopenharmony_ci                                   struct pipe_resource *p_res,
4018bf215546Sopenharmony_ci                                   unsigned buffer_offset,
4019bf215546Sopenharmony_ci                                   unsigned buffer_size)
4020bf215546Sopenharmony_ci{
4021bf215546Sopenharmony_ci   struct crocus_resource *res = (void *) p_res;
4022bf215546Sopenharmony_ci   struct crocus_stream_output_target *cso = calloc(1, sizeof(*cso));
4023bf215546Sopenharmony_ci   if (!cso)
4024bf215546Sopenharmony_ci      return NULL;
4025bf215546Sopenharmony_ci
4026bf215546Sopenharmony_ci   res->bind_history |= PIPE_BIND_STREAM_OUTPUT;
4027bf215546Sopenharmony_ci
4028bf215546Sopenharmony_ci   pipe_reference_init(&cso->base.reference, 1);
4029bf215546Sopenharmony_ci   pipe_resource_reference(&cso->base.buffer, p_res);
4030bf215546Sopenharmony_ci   cso->base.buffer_offset = buffer_offset;
4031bf215546Sopenharmony_ci   cso->base.buffer_size = buffer_size;
4032bf215546Sopenharmony_ci   cso->base.context = ctx;
4033bf215546Sopenharmony_ci
4034bf215546Sopenharmony_ci   util_range_add(&res->base.b, &res->valid_buffer_range, buffer_offset,
4035bf215546Sopenharmony_ci                  buffer_offset + buffer_size);
4036bf215546Sopenharmony_ci#if GFX_VER >= 7
4037bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
4038bf215546Sopenharmony_ci   void *temp;
4039bf215546Sopenharmony_ci   u_upload_alloc(ice->ctx.stream_uploader, 0, sizeof(uint32_t), 4,
4040bf215546Sopenharmony_ci                  &cso->offset_offset,
4041bf215546Sopenharmony_ci                  (struct pipe_resource **)&cso->offset_res,
4042bf215546Sopenharmony_ci                  &temp);
4043bf215546Sopenharmony_ci#endif
4044bf215546Sopenharmony_ci
4045bf215546Sopenharmony_ci   return &cso->base;
4046bf215546Sopenharmony_ci}
4047bf215546Sopenharmony_ci
4048bf215546Sopenharmony_cistatic void
4049bf215546Sopenharmony_cicrocus_stream_output_target_destroy(struct pipe_context *ctx,
4050bf215546Sopenharmony_ci                                    struct pipe_stream_output_target *state)
4051bf215546Sopenharmony_ci{
4052bf215546Sopenharmony_ci   struct crocus_stream_output_target *cso = (void *) state;
4053bf215546Sopenharmony_ci
4054bf215546Sopenharmony_ci   pipe_resource_reference((struct pipe_resource **)&cso->offset_res, NULL);
4055bf215546Sopenharmony_ci   pipe_resource_reference(&cso->base.buffer, NULL);
4056bf215546Sopenharmony_ci
4057bf215546Sopenharmony_ci   free(cso);
4058bf215546Sopenharmony_ci}
4059bf215546Sopenharmony_ci
4060bf215546Sopenharmony_ci#define GEN6_SO_NUM_PRIMS_WRITTEN       0x2288
4061bf215546Sopenharmony_ci#define GEN7_SO_WRITE_OFFSET(n)         (0x5280 + (n) * 4)
4062bf215546Sopenharmony_ci
4063bf215546Sopenharmony_ci#if GFX_VER == 6
4064bf215546Sopenharmony_cistatic void
4065bf215546Sopenharmony_ciaggregate_stream_counter(struct crocus_batch *batch, struct crocus_stream_output_target *tgt,
4066bf215546Sopenharmony_ci                         struct crocus_streamout_counter *counter)
4067bf215546Sopenharmony_ci{
4068bf215546Sopenharmony_ci   uint64_t *prim_counts = tgt->prim_map;
4069bf215546Sopenharmony_ci
4070bf215546Sopenharmony_ci   if (crocus_batch_references(batch, tgt->offset_res->bo)) {
4071bf215546Sopenharmony_ci      struct pipe_fence_handle *out_fence = NULL;
4072bf215546Sopenharmony_ci      batch->ice->ctx.flush(&batch->ice->ctx, &out_fence, 0);
4073bf215546Sopenharmony_ci      batch->screen->base.fence_finish(&batch->screen->base, &batch->ice->ctx, out_fence, UINT64_MAX);
4074bf215546Sopenharmony_ci      batch->screen->base.fence_reference(&batch->screen->base, &out_fence, NULL);
4075bf215546Sopenharmony_ci   }
4076bf215546Sopenharmony_ci
4077bf215546Sopenharmony_ci   for (unsigned i = counter->offset_start / sizeof(uint64_t); i < counter->offset_end / sizeof(uint64_t); i += 2) {
4078bf215546Sopenharmony_ci      counter->accum += prim_counts[i + 1] - prim_counts[i];
4079bf215546Sopenharmony_ci   }
4080bf215546Sopenharmony_ci   tgt->count.offset_start = tgt->count.offset_end = 0;
4081bf215546Sopenharmony_ci}
4082bf215546Sopenharmony_ci
4083bf215546Sopenharmony_cistatic void
4084bf215546Sopenharmony_cicrocus_stream_store_prims_written(struct crocus_batch *batch,
4085bf215546Sopenharmony_ci                                  struct crocus_stream_output_target *tgt)
4086bf215546Sopenharmony_ci{
4087bf215546Sopenharmony_ci   if (!tgt->offset_res) {
4088bf215546Sopenharmony_ci      u_upload_alloc(batch->ice->ctx.stream_uploader, 0, 4096, 4,
4089bf215546Sopenharmony_ci                     &tgt->offset_offset,
4090bf215546Sopenharmony_ci                     (struct pipe_resource **)&tgt->offset_res,
4091bf215546Sopenharmony_ci                     &tgt->prim_map);
4092bf215546Sopenharmony_ci      tgt->count.offset_start = tgt->count.offset_end = 0;
4093bf215546Sopenharmony_ci   }
4094bf215546Sopenharmony_ci
4095bf215546Sopenharmony_ci   if (tgt->count.offset_end + 16 >= 4096) {
4096bf215546Sopenharmony_ci      aggregate_stream_counter(batch, tgt, &tgt->prev_count);
4097bf215546Sopenharmony_ci      aggregate_stream_counter(batch, tgt, &tgt->count);
4098bf215546Sopenharmony_ci   }
4099bf215546Sopenharmony_ci
4100bf215546Sopenharmony_ci   crocus_emit_mi_flush(batch);
4101bf215546Sopenharmony_ci   crocus_store_register_mem64(batch, GEN6_SO_NUM_PRIMS_WRITTEN,
4102bf215546Sopenharmony_ci                               tgt->offset_res->bo,
4103bf215546Sopenharmony_ci                               tgt->count.offset_end + tgt->offset_offset, false);
4104bf215546Sopenharmony_ci   tgt->count.offset_end += 8;
4105bf215546Sopenharmony_ci}
4106bf215546Sopenharmony_ci
4107bf215546Sopenharmony_cistatic void
4108bf215546Sopenharmony_cicompute_vertices_written_so_far(struct crocus_context *ice,
4109bf215546Sopenharmony_ci                                struct crocus_stream_output_target *tgt,
4110bf215546Sopenharmony_ci                                struct crocus_streamout_counter *counter,
4111bf215546Sopenharmony_ci                                uint64_t *svbi)
4112bf215546Sopenharmony_ci{
4113bf215546Sopenharmony_ci   //TODO vertices per prim
4114bf215546Sopenharmony_ci   aggregate_stream_counter(&ice->batches[0], tgt, counter);
4115bf215546Sopenharmony_ci
4116bf215546Sopenharmony_ci   *svbi = counter->accum * ice->state.last_xfb_verts_per_prim;
4117bf215546Sopenharmony_ci}
4118bf215546Sopenharmony_ci#endif
4119bf215546Sopenharmony_ci/**
4120bf215546Sopenharmony_ci * The pipe->set_stream_output_targets() driver hook.
4121bf215546Sopenharmony_ci *
4122bf215546Sopenharmony_ci * At this point, we know which targets are bound to a particular index,
4123bf215546Sopenharmony_ci * and also whether we want to append or start over.  We can finish the
4124bf215546Sopenharmony_ci * 3DSTATE_SO_BUFFER packets we started earlier.
4125bf215546Sopenharmony_ci */
4126bf215546Sopenharmony_cistatic void
4127bf215546Sopenharmony_cicrocus_set_stream_output_targets(struct pipe_context *ctx,
4128bf215546Sopenharmony_ci                                 unsigned num_targets,
4129bf215546Sopenharmony_ci                                 struct pipe_stream_output_target **targets,
4130bf215546Sopenharmony_ci                                 const unsigned *offsets)
4131bf215546Sopenharmony_ci{
4132bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
4133bf215546Sopenharmony_ci   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
4134bf215546Sopenharmony_ci   struct pipe_stream_output_target *old_tgt[4] = { NULL, NULL, NULL, NULL };
4135bf215546Sopenharmony_ci   const bool active = num_targets > 0;
4136bf215546Sopenharmony_ci   if (ice->state.streamout_active != active) {
4137bf215546Sopenharmony_ci      ice->state.streamout_active = active;
4138bf215546Sopenharmony_ci#if GFX_VER >= 7
4139bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;
4140bf215546Sopenharmony_ci#else
4141bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
4142bf215546Sopenharmony_ci#endif
4143bf215546Sopenharmony_ci
4144bf215546Sopenharmony_ci      /* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because
4145bf215546Sopenharmony_ci       * it's a non-pipelined command.  If we're switching streamout on, we
4146bf215546Sopenharmony_ci       * may have missed emitting it earlier, so do so now.  (We're already
4147bf215546Sopenharmony_ci       * taking a stall to update 3DSTATE_SO_BUFFERS anyway...)
4148bf215546Sopenharmony_ci       */
4149bf215546Sopenharmony_ci      if (active) {
4150bf215546Sopenharmony_ci#if GFX_VER >= 7
4151bf215546Sopenharmony_ci         ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST;
4152bf215546Sopenharmony_ci#endif
4153bf215546Sopenharmony_ci      } else {
4154bf215546Sopenharmony_ci         uint32_t flush = 0;
4155bf215546Sopenharmony_ci         for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4156bf215546Sopenharmony_ci            struct crocus_stream_output_target *tgt =
4157bf215546Sopenharmony_ci               (void *) ice->state.so_target[i];
4158bf215546Sopenharmony_ci            if (tgt) {
4159bf215546Sopenharmony_ci               struct crocus_resource *res = (void *) tgt->base.buffer;
4160bf215546Sopenharmony_ci
4161bf215546Sopenharmony_ci               flush |= crocus_flush_bits_for_history(res);
4162bf215546Sopenharmony_ci               crocus_dirty_for_history(ice, res);
4163bf215546Sopenharmony_ci            }
4164bf215546Sopenharmony_ci         }
4165bf215546Sopenharmony_ci         crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
4166bf215546Sopenharmony_ci                                        "make streamout results visible", flush);
4167bf215546Sopenharmony_ci      }
4168bf215546Sopenharmony_ci   }
4169bf215546Sopenharmony_ci
4170bf215546Sopenharmony_ci   ice->state.so_targets = num_targets;
4171bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
4172bf215546Sopenharmony_ci      pipe_so_target_reference(&old_tgt[i], ice->state.so_target[i]);
4173bf215546Sopenharmony_ci      pipe_so_target_reference(&ice->state.so_target[i],
4174bf215546Sopenharmony_ci                               i < num_targets ? targets[i] : NULL);
4175bf215546Sopenharmony_ci   }
4176bf215546Sopenharmony_ci
4177bf215546Sopenharmony_ci#if GFX_VER == 6
4178bf215546Sopenharmony_ci   bool stored_num_prims = false;
4179bf215546Sopenharmony_ci   for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4180bf215546Sopenharmony_ci      if (num_targets) {
4181bf215546Sopenharmony_ci         struct crocus_stream_output_target *tgt =
4182bf215546Sopenharmony_ci            (void *) ice->state.so_target[i];
4183bf215546Sopenharmony_ci
4184bf215546Sopenharmony_ci         if (!tgt)
4185bf215546Sopenharmony_ci            continue;
4186bf215546Sopenharmony_ci         if (offsets[i] == 0) {
4187bf215546Sopenharmony_ci            // This means that we're supposed to ignore anything written to
4188bf215546Sopenharmony_ci            // the buffer before. We can do this by just clearing out the
4189bf215546Sopenharmony_ci            // count of writes to the prim count buffer.
4190bf215546Sopenharmony_ci            tgt->count.offset_start = tgt->count.offset_end;
4191bf215546Sopenharmony_ci            tgt->count.accum = 0;
4192bf215546Sopenharmony_ci            ice->state.svbi = 0;
4193bf215546Sopenharmony_ci         } else {
4194bf215546Sopenharmony_ci            if (tgt->offset_res) {
4195bf215546Sopenharmony_ci               compute_vertices_written_so_far(ice, tgt, &tgt->count, &ice->state.svbi);
4196bf215546Sopenharmony_ci               tgt->count.offset_start = tgt->count.offset_end;
4197bf215546Sopenharmony_ci            }
4198bf215546Sopenharmony_ci         }
4199bf215546Sopenharmony_ci
4200bf215546Sopenharmony_ci         if (!stored_num_prims) {
4201bf215546Sopenharmony_ci            crocus_stream_store_prims_written(batch, tgt);
4202bf215546Sopenharmony_ci            stored_num_prims = true;
4203bf215546Sopenharmony_ci         }
4204bf215546Sopenharmony_ci      } else {
4205bf215546Sopenharmony_ci         struct crocus_stream_output_target *tgt =
4206bf215546Sopenharmony_ci            (void *) old_tgt[i];
4207bf215546Sopenharmony_ci         if (tgt) {
4208bf215546Sopenharmony_ci            if (!stored_num_prims) {
4209bf215546Sopenharmony_ci               crocus_stream_store_prims_written(batch, tgt);
4210bf215546Sopenharmony_ci               stored_num_prims = true;
4211bf215546Sopenharmony_ci            }
4212bf215546Sopenharmony_ci
4213bf215546Sopenharmony_ci            if (tgt->offset_res) {
4214bf215546Sopenharmony_ci               tgt->prev_count = tgt->count;
4215bf215546Sopenharmony_ci            }
4216bf215546Sopenharmony_ci         }
4217bf215546Sopenharmony_ci      }
4218bf215546Sopenharmony_ci      pipe_so_target_reference(&old_tgt[i], NULL);
4219bf215546Sopenharmony_ci   }
4220bf215546Sopenharmony_ci   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS;
4221bf215546Sopenharmony_ci#else
4222bf215546Sopenharmony_ci   for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4223bf215546Sopenharmony_ci      if (num_targets) {
4224bf215546Sopenharmony_ci         struct crocus_stream_output_target *tgt =
4225bf215546Sopenharmony_ci            (void *) ice->state.so_target[i];
4226bf215546Sopenharmony_ci
4227bf215546Sopenharmony_ci         if (offsets[i] == 0) {
4228bf215546Sopenharmony_ci#if GFX_VER == 8
4229bf215546Sopenharmony_ci            if (tgt)
4230bf215546Sopenharmony_ci               tgt->zero_offset = true;
4231bf215546Sopenharmony_ci#endif
4232bf215546Sopenharmony_ci            crocus_load_register_imm32(batch, GEN7_SO_WRITE_OFFSET(i), 0);
4233bf215546Sopenharmony_ci         }
4234bf215546Sopenharmony_ci         else if (tgt)
4235bf215546Sopenharmony_ci            crocus_load_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i),
4236bf215546Sopenharmony_ci                                       tgt->offset_res->bo,
4237bf215546Sopenharmony_ci                                       tgt->offset_offset);
4238bf215546Sopenharmony_ci      } else {
4239bf215546Sopenharmony_ci         struct crocus_stream_output_target *tgt =
4240bf215546Sopenharmony_ci            (void *) old_tgt[i];
4241bf215546Sopenharmony_ci         if (tgt)
4242bf215546Sopenharmony_ci            crocus_store_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i),
4243bf215546Sopenharmony_ci                                        tgt->offset_res->bo,
4244bf215546Sopenharmony_ci                                        tgt->offset_offset, false);
4245bf215546Sopenharmony_ci      }
4246bf215546Sopenharmony_ci      pipe_so_target_reference(&old_tgt[i], NULL);
4247bf215546Sopenharmony_ci   }
4248bf215546Sopenharmony_ci#endif
4249bf215546Sopenharmony_ci   /* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */
4250bf215546Sopenharmony_ci   if (!active)
4251bf215546Sopenharmony_ci      return;
4252bf215546Sopenharmony_ci#if GFX_VER >= 7
4253bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;
4254bf215546Sopenharmony_ci#elif GFX_VER == 6
4255bf215546Sopenharmony_ci   ice->state.dirty |= CROCUS_DIRTY_GEN6_SVBI;
4256bf215546Sopenharmony_ci#endif
4257bf215546Sopenharmony_ci}
4258bf215546Sopenharmony_ci
4259bf215546Sopenharmony_ci#endif
4260bf215546Sopenharmony_ci
4261bf215546Sopenharmony_ci#if GFX_VER >= 7
4262bf215546Sopenharmony_ci/**
4263bf215546Sopenharmony_ci * An crocus-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and
4264bf215546Sopenharmony_ci * 3DSTATE_STREAMOUT packets.
4265bf215546Sopenharmony_ci *
4266bf215546Sopenharmony_ci * 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout
4267bf215546Sopenharmony_ci * hardware to record.  We can create it entirely based on the shader, with
4268bf215546Sopenharmony_ci * no dynamic state dependencies.
4269bf215546Sopenharmony_ci *
4270bf215546Sopenharmony_ci * 3DSTATE_STREAMOUT is an annoying mix of shader-based information and
4271bf215546Sopenharmony_ci * state-based settings.  We capture the shader-related ones here, and merge
4272bf215546Sopenharmony_ci * the rest in at draw time.
4273bf215546Sopenharmony_ci */
4274bf215546Sopenharmony_cistatic uint32_t *
4275bf215546Sopenharmony_cicrocus_create_so_decl_list(const struct pipe_stream_output_info *info,
4276bf215546Sopenharmony_ci                           const struct brw_vue_map *vue_map)
4277bf215546Sopenharmony_ci{
4278bf215546Sopenharmony_ci   struct GENX(SO_DECL) so_decl[PIPE_MAX_VERTEX_STREAMS][128];
4279bf215546Sopenharmony_ci   int buffer_mask[PIPE_MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
4280bf215546Sopenharmony_ci   int next_offset[PIPE_MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
4281bf215546Sopenharmony_ci   int decls[PIPE_MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
4282bf215546Sopenharmony_ci   int max_decls = 0;
4283bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= PIPE_MAX_SO_OUTPUTS);
4284bf215546Sopenharmony_ci
4285bf215546Sopenharmony_ci   memset(so_decl, 0, sizeof(so_decl));
4286bf215546Sopenharmony_ci
4287bf215546Sopenharmony_ci   /* Construct the list of SO_DECLs to be emitted.  The formatting of the
4288bf215546Sopenharmony_ci    * command feels strange -- each dword pair contains a SO_DECL per stream.
4289bf215546Sopenharmony_ci    */
4290bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->num_outputs; i++) {
4291bf215546Sopenharmony_ci      const struct pipe_stream_output *output = &info->output[i];
4292bf215546Sopenharmony_ci      const int buffer = output->output_buffer;
4293bf215546Sopenharmony_ci      const int varying = output->register_index;
4294bf215546Sopenharmony_ci      const unsigned stream_id = output->stream;
4295bf215546Sopenharmony_ci      assert(stream_id < PIPE_MAX_VERTEX_STREAMS);
4296bf215546Sopenharmony_ci
4297bf215546Sopenharmony_ci      buffer_mask[stream_id] |= 1 << buffer;
4298bf215546Sopenharmony_ci
4299bf215546Sopenharmony_ci      assert(vue_map->varying_to_slot[varying] >= 0);
4300bf215546Sopenharmony_ci
4301bf215546Sopenharmony_ci      /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
4302bf215546Sopenharmony_ci       * array.  Instead, it simply increments DstOffset for the following
4303bf215546Sopenharmony_ci       * input by the number of components that should be skipped.
4304bf215546Sopenharmony_ci       *
4305bf215546Sopenharmony_ci       * Our hardware is unusual in that it requires us to program SO_DECLs
4306bf215546Sopenharmony_ci       * for fake "hole" components, rather than simply taking the offset
4307bf215546Sopenharmony_ci       * for each real varying.  Each hole can have size 1, 2, 3, or 4; we
4308bf215546Sopenharmony_ci       * program as many size = 4 holes as we can, then a final hole to
4309bf215546Sopenharmony_ci       * accommodate the final 1, 2, or 3 remaining.
4310bf215546Sopenharmony_ci       */
4311bf215546Sopenharmony_ci      int skip_components = output->dst_offset - next_offset[buffer];
4312bf215546Sopenharmony_ci
4313bf215546Sopenharmony_ci      while (skip_components > 0) {
4314bf215546Sopenharmony_ci         so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
4315bf215546Sopenharmony_ci            .HoleFlag = 1,
4316bf215546Sopenharmony_ci            .OutputBufferSlot = output->output_buffer,
4317bf215546Sopenharmony_ci            .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
4318bf215546Sopenharmony_ci         };
4319bf215546Sopenharmony_ci         skip_components -= 4;
4320bf215546Sopenharmony_ci      }
4321bf215546Sopenharmony_ci
4322bf215546Sopenharmony_ci      next_offset[buffer] = output->dst_offset + output->num_components;
4323bf215546Sopenharmony_ci
4324bf215546Sopenharmony_ci      so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
4325bf215546Sopenharmony_ci         .OutputBufferSlot = output->output_buffer,
4326bf215546Sopenharmony_ci         .RegisterIndex = vue_map->varying_to_slot[varying],
4327bf215546Sopenharmony_ci         .ComponentMask =
4328bf215546Sopenharmony_ci            ((1 << output->num_components) - 1) << output->start_component,
4329bf215546Sopenharmony_ci      };
4330bf215546Sopenharmony_ci
4331bf215546Sopenharmony_ci      if (decls[stream_id] > max_decls)
4332bf215546Sopenharmony_ci         max_decls = decls[stream_id];
4333bf215546Sopenharmony_ci   }
4334bf215546Sopenharmony_ci
4335bf215546Sopenharmony_ci   unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls);
4336bf215546Sopenharmony_ci   uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords);
4337bf215546Sopenharmony_ci   uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length);
4338bf215546Sopenharmony_ci
4339bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) {
4340bf215546Sopenharmony_ci      int urb_entry_read_offset = 0;
4341bf215546Sopenharmony_ci      int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
4342bf215546Sopenharmony_ci         urb_entry_read_offset;
4343bf215546Sopenharmony_ci
4344bf215546Sopenharmony_ci      /* We always read the whole vertex.  This could be reduced at some
4345bf215546Sopenharmony_ci       * point by reading less and offsetting the register index in the
4346bf215546Sopenharmony_ci       * SO_DECLs.
4347bf215546Sopenharmony_ci       */
4348bf215546Sopenharmony_ci      sol.Stream0VertexReadOffset = urb_entry_read_offset;
4349bf215546Sopenharmony_ci      sol.Stream0VertexReadLength = urb_entry_read_length - 1;
4350bf215546Sopenharmony_ci      sol.Stream1VertexReadOffset = urb_entry_read_offset;
4351bf215546Sopenharmony_ci      sol.Stream1VertexReadLength = urb_entry_read_length - 1;
4352bf215546Sopenharmony_ci      sol.Stream2VertexReadOffset = urb_entry_read_offset;
4353bf215546Sopenharmony_ci      sol.Stream2VertexReadLength = urb_entry_read_length - 1;
4354bf215546Sopenharmony_ci      sol.Stream3VertexReadOffset = urb_entry_read_offset;
4355bf215546Sopenharmony_ci      sol.Stream3VertexReadLength = urb_entry_read_length - 1;
4356bf215546Sopenharmony_ci
4357bf215546Sopenharmony_ci      // TODO: Double-check that stride == 0 means no buffer. Probably this
4358bf215546Sopenharmony_ci      // needs to go elsewhere, where the buffer enable stuff is actually
4359bf215546Sopenharmony_ci      // known.
4360bf215546Sopenharmony_ci#if GFX_VER < 8
4361bf215546Sopenharmony_ci      sol.SOBufferEnable0 = !!info->stride[0];
4362bf215546Sopenharmony_ci      sol.SOBufferEnable1 = !!info->stride[1];
4363bf215546Sopenharmony_ci      sol.SOBufferEnable2 = !!info->stride[2];
4364bf215546Sopenharmony_ci      sol.SOBufferEnable3 = !!info->stride[3];
4365bf215546Sopenharmony_ci#else
4366bf215546Sopenharmony_ci      /* Set buffer pitches; 0 means unbound. */
4367bf215546Sopenharmony_ci      sol.Buffer0SurfacePitch = 4 * info->stride[0];
4368bf215546Sopenharmony_ci      sol.Buffer1SurfacePitch = 4 * info->stride[1];
4369bf215546Sopenharmony_ci      sol.Buffer2SurfacePitch = 4 * info->stride[2];
4370bf215546Sopenharmony_ci      sol.Buffer3SurfacePitch = 4 * info->stride[3];
4371bf215546Sopenharmony_ci#endif
4372bf215546Sopenharmony_ci   }
4373bf215546Sopenharmony_ci
4374bf215546Sopenharmony_ci   crocus_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) {
4375bf215546Sopenharmony_ci      list.DWordLength = 3 + 2 * max_decls - 2;
4376bf215546Sopenharmony_ci      list.StreamtoBufferSelects0 = buffer_mask[0];
4377bf215546Sopenharmony_ci      list.StreamtoBufferSelects1 = buffer_mask[1];
4378bf215546Sopenharmony_ci      list.StreamtoBufferSelects2 = buffer_mask[2];
4379bf215546Sopenharmony_ci      list.StreamtoBufferSelects3 = buffer_mask[3];
4380bf215546Sopenharmony_ci      list.NumEntries0 = decls[0];
4381bf215546Sopenharmony_ci      list.NumEntries1 = decls[1];
4382bf215546Sopenharmony_ci      list.NumEntries2 = decls[2];
4383bf215546Sopenharmony_ci      list.NumEntries3 = decls[3];
4384bf215546Sopenharmony_ci   }
4385bf215546Sopenharmony_ci
4386bf215546Sopenharmony_ci   for (int i = 0; i < max_decls; i++) {
4387bf215546Sopenharmony_ci      crocus_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) {
4388bf215546Sopenharmony_ci         entry.Stream0Decl = so_decl[0][i];
4389bf215546Sopenharmony_ci         entry.Stream1Decl = so_decl[1][i];
4390bf215546Sopenharmony_ci         entry.Stream2Decl = so_decl[2][i];
4391bf215546Sopenharmony_ci         entry.Stream3Decl = so_decl[3][i];
4392bf215546Sopenharmony_ci      }
4393bf215546Sopenharmony_ci   }
4394bf215546Sopenharmony_ci
4395bf215546Sopenharmony_ci   return map;
4396bf215546Sopenharmony_ci}
4397bf215546Sopenharmony_ci#endif
4398bf215546Sopenharmony_ci
4399bf215546Sopenharmony_ci#if GFX_VER == 6
4400bf215546Sopenharmony_cistatic void
4401bf215546Sopenharmony_cicrocus_emit_so_svbi(struct crocus_context *ice)
4402bf215546Sopenharmony_ci{
4403bf215546Sopenharmony_ci   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
4404bf215546Sopenharmony_ci
4405bf215546Sopenharmony_ci   unsigned max_vertex = 0xffffffff;
4406bf215546Sopenharmony_ci   for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
4407bf215546Sopenharmony_ci      struct crocus_stream_output_target *tgt =
4408bf215546Sopenharmony_ci         (void *) ice->state.so_target[i];
4409bf215546Sopenharmony_ci      if (tgt)
4410bf215546Sopenharmony_ci         max_vertex = MIN2(max_vertex, tgt->base.buffer_size / tgt->stride);
4411bf215546Sopenharmony_ci   }
4412bf215546Sopenharmony_ci
4413bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) {
4414bf215546Sopenharmony_ci      svbi.IndexNumber = 0;
4415bf215546Sopenharmony_ci      svbi.StreamedVertexBufferIndex = (uint32_t)ice->state.svbi; /* fix when resuming, based on target's prim count */
4416bf215546Sopenharmony_ci      svbi.MaximumIndex = max_vertex;
4417bf215546Sopenharmony_ci   }
4418bf215546Sopenharmony_ci
4419bf215546Sopenharmony_ci   /* initialize the rest of the SVBI's to reasonable values so that we don't
4420bf215546Sopenharmony_ci    * run out of room writing the regular data.
4421bf215546Sopenharmony_ci    */
4422bf215546Sopenharmony_ci   for (int i = 1; i < 4; i++) {
4423bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) {
4424bf215546Sopenharmony_ci         svbi.IndexNumber = i;
4425bf215546Sopenharmony_ci         svbi.StreamedVertexBufferIndex = 0;
4426bf215546Sopenharmony_ci         svbi.MaximumIndex = 0xffffffff;
4427bf215546Sopenharmony_ci      }
4428bf215546Sopenharmony_ci   }
4429bf215546Sopenharmony_ci}
4430bf215546Sopenharmony_ci
4431bf215546Sopenharmony_ci#endif
4432bf215546Sopenharmony_ci
4433bf215546Sopenharmony_ci
4434bf215546Sopenharmony_ci#if GFX_VER >= 6
4435bf215546Sopenharmony_cistatic bool
4436bf215546Sopenharmony_cicrocus_is_drawing_points(const struct crocus_context *ice)
4437bf215546Sopenharmony_ci{
4438bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4439bf215546Sopenharmony_ci
4440bf215546Sopenharmony_ci   if (cso_rast->cso.fill_front == PIPE_POLYGON_MODE_POINT ||
4441bf215546Sopenharmony_ci       cso_rast->cso.fill_back == PIPE_POLYGON_MODE_POINT)
4442bf215546Sopenharmony_ci      return true;
4443bf215546Sopenharmony_ci
4444bf215546Sopenharmony_ci   if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
4445bf215546Sopenharmony_ci      const struct brw_gs_prog_data *gs_prog_data =
4446bf215546Sopenharmony_ci         (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
4447bf215546Sopenharmony_ci      return gs_prog_data->output_topology == _3DPRIM_POINTLIST;
4448bf215546Sopenharmony_ci   } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
4449bf215546Sopenharmony_ci      const struct brw_tes_prog_data *tes_data =
4450bf215546Sopenharmony_ci         (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;
4451bf215546Sopenharmony_ci      return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
4452bf215546Sopenharmony_ci   } else {
4453bf215546Sopenharmony_ci      return ice->state.prim_mode == PIPE_PRIM_POINTS;
4454bf215546Sopenharmony_ci   }
4455bf215546Sopenharmony_ci}
4456bf215546Sopenharmony_ci#endif
4457bf215546Sopenharmony_ci
4458bf215546Sopenharmony_ci#if GFX_VER >= 6
4459bf215546Sopenharmony_cistatic void
4460bf215546Sopenharmony_ciget_attr_override(
4461bf215546Sopenharmony_ci   struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
4462bf215546Sopenharmony_ci   const struct brw_vue_map *vue_map,
4463bf215546Sopenharmony_ci   int urb_entry_read_offset, int fs_attr,
4464bf215546Sopenharmony_ci   bool two_side_color, uint32_t *max_source_attr)
4465bf215546Sopenharmony_ci{
4466bf215546Sopenharmony_ci   /* Find the VUE slot for this attribute. */
4467bf215546Sopenharmony_ci   int slot = vue_map->varying_to_slot[fs_attr];
4468bf215546Sopenharmony_ci
4469bf215546Sopenharmony_ci   /* Viewport and Layer are stored in the VUE header.  We need to override
4470bf215546Sopenharmony_ci    * them to zero if earlier stages didn't write them, as GL requires that
4471bf215546Sopenharmony_ci    * they read back as zero when not explicitly set.
4472bf215546Sopenharmony_ci    */
4473bf215546Sopenharmony_ci   if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
4474bf215546Sopenharmony_ci      attr->ComponentOverrideX = true;
4475bf215546Sopenharmony_ci      attr->ComponentOverrideW = true;
4476bf215546Sopenharmony_ci      attr->ConstantSource = CONST_0000;
4477bf215546Sopenharmony_ci
4478bf215546Sopenharmony_ci      if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
4479bf215546Sopenharmony_ci         attr->ComponentOverrideY = true;
4480bf215546Sopenharmony_ci      if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
4481bf215546Sopenharmony_ci         attr->ComponentOverrideZ = true;
4482bf215546Sopenharmony_ci
4483bf215546Sopenharmony_ci      return;
4484bf215546Sopenharmony_ci   }
4485bf215546Sopenharmony_ci
4486bf215546Sopenharmony_ci   /* If there was only a back color written but not front, use back
4487bf215546Sopenharmony_ci    * as the color instead of undefined
4488bf215546Sopenharmony_ci    */
4489bf215546Sopenharmony_ci   if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
4490bf215546Sopenharmony_ci      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
4491bf215546Sopenharmony_ci   if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
4492bf215546Sopenharmony_ci      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
4493bf215546Sopenharmony_ci
4494bf215546Sopenharmony_ci   if (slot == -1) {
4495bf215546Sopenharmony_ci      /* This attribute does not exist in the VUE--that means that the vertex
4496bf215546Sopenharmony_ci       * shader did not write to it.  This means that either:
4497bf215546Sopenharmony_ci       *
4498bf215546Sopenharmony_ci       * (a) This attribute is a texture coordinate, and it is going to be
4499bf215546Sopenharmony_ci       * replaced with point coordinates (as a consequence of a call to
4500bf215546Sopenharmony_ci       * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
4501bf215546Sopenharmony_ci       * hardware will ignore whatever attribute override we supply.
4502bf215546Sopenharmony_ci       *
4503bf215546Sopenharmony_ci       * (b) This attribute is read by the fragment shader but not written by
4504bf215546Sopenharmony_ci       * the vertex shader, so its value is undefined.  Therefore the
4505bf215546Sopenharmony_ci       * attribute override we supply doesn't matter.
4506bf215546Sopenharmony_ci       *
4507bf215546Sopenharmony_ci       * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
4508bf215546Sopenharmony_ci       * previous shader stage.
4509bf215546Sopenharmony_ci       *
4510bf215546Sopenharmony_ci       * Note that we don't have to worry about the cases where the attribute
4511bf215546Sopenharmony_ci       * is gl_PointCoord or is undergoing point sprite coordinate
4512bf215546Sopenharmony_ci       * replacement, because in those cases, this function isn't called.
4513bf215546Sopenharmony_ci       *
4514bf215546Sopenharmony_ci       * In case (c), we need to program the attribute overrides so that the
4515bf215546Sopenharmony_ci       * primitive ID will be stored in this slot.  In every other case, the
4516bf215546Sopenharmony_ci       * attribute override we supply doesn't matter.  So just go ahead and
4517bf215546Sopenharmony_ci       * program primitive ID in every case.
4518bf215546Sopenharmony_ci       */
4519bf215546Sopenharmony_ci      attr->ComponentOverrideW = true;
4520bf215546Sopenharmony_ci      attr->ComponentOverrideX = true;
4521bf215546Sopenharmony_ci      attr->ComponentOverrideY = true;
4522bf215546Sopenharmony_ci      attr->ComponentOverrideZ = true;
4523bf215546Sopenharmony_ci      attr->ConstantSource = PRIM_ID;
4524bf215546Sopenharmony_ci      return;
4525bf215546Sopenharmony_ci   }
4526bf215546Sopenharmony_ci
4527bf215546Sopenharmony_ci   /* Compute the location of the attribute relative to urb_entry_read_offset.
4528bf215546Sopenharmony_ci    * Each increment of urb_entry_read_offset represents a 256-bit value, so
4529bf215546Sopenharmony_ci    * it counts for two 128-bit VUE slots.
4530bf215546Sopenharmony_ci    */
4531bf215546Sopenharmony_ci   int source_attr = slot - 2 * urb_entry_read_offset;
4532bf215546Sopenharmony_ci   assert(source_attr >= 0 && source_attr < 32);
4533bf215546Sopenharmony_ci
4534bf215546Sopenharmony_ci   /* If we are doing two-sided color, and the VUE slot following this one
4535bf215546Sopenharmony_ci    * represents a back-facing color, then we need to instruct the SF unit to
4536bf215546Sopenharmony_ci    * do back-facing swizzling.
4537bf215546Sopenharmony_ci    */
4538bf215546Sopenharmony_ci   bool swizzling = two_side_color &&
4539bf215546Sopenharmony_ci      ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
4540bf215546Sopenharmony_ci        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
4541bf215546Sopenharmony_ci       (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
4542bf215546Sopenharmony_ci        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
4543bf215546Sopenharmony_ci
4544bf215546Sopenharmony_ci   /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
4545bf215546Sopenharmony_ci   if (*max_source_attr < source_attr + swizzling)
4546bf215546Sopenharmony_ci      *max_source_attr = source_attr + swizzling;
4547bf215546Sopenharmony_ci
4548bf215546Sopenharmony_ci   attr->SourceAttribute = source_attr;
4549bf215546Sopenharmony_ci   if (swizzling)
4550bf215546Sopenharmony_ci      attr->SwizzleSelect = INPUTATTR_FACING;
4551bf215546Sopenharmony_ci}
4552bf215546Sopenharmony_ci
4553bf215546Sopenharmony_cistatic void
4554bf215546Sopenharmony_cicalculate_attr_overrides(
4555bf215546Sopenharmony_ci   const struct crocus_context *ice,
4556bf215546Sopenharmony_ci   struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
4557bf215546Sopenharmony_ci   uint32_t *point_sprite_enables,
4558bf215546Sopenharmony_ci   uint32_t *urb_entry_read_length,
4559bf215546Sopenharmony_ci   uint32_t *urb_entry_read_offset)
4560bf215546Sopenharmony_ci{
4561bf215546Sopenharmony_ci   const struct brw_wm_prog_data *wm_prog_data = (void *)
4562bf215546Sopenharmony_ci      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
4563bf215546Sopenharmony_ci   const struct brw_vue_map *vue_map = ice->shaders.last_vue_map;
4564bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4565bf215546Sopenharmony_ci   uint32_t max_source_attr = 0;
4566bf215546Sopenharmony_ci   const struct shader_info *fs_info =
4567bf215546Sopenharmony_ci      crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
4568bf215546Sopenharmony_ci
4569bf215546Sopenharmony_ci   int first_slot =
4570bf215546Sopenharmony_ci      brw_compute_first_urb_slot_required(fs_info->inputs_read, vue_map);
4571bf215546Sopenharmony_ci
4572bf215546Sopenharmony_ci   /* Each URB offset packs two varying slots */
4573bf215546Sopenharmony_ci   assert(first_slot % 2 == 0);
4574bf215546Sopenharmony_ci   *urb_entry_read_offset = first_slot / 2;
4575bf215546Sopenharmony_ci   *point_sprite_enables = 0;
4576bf215546Sopenharmony_ci
4577bf215546Sopenharmony_ci   for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) {
4578bf215546Sopenharmony_ci      const int input_index = wm_prog_data->urb_setup[fs_attr];
4579bf215546Sopenharmony_ci
4580bf215546Sopenharmony_ci      if (input_index < 0)
4581bf215546Sopenharmony_ci         continue;
4582bf215546Sopenharmony_ci
4583bf215546Sopenharmony_ci      bool point_sprite = false;
4584bf215546Sopenharmony_ci      if (crocus_is_drawing_points(ice)) {
4585bf215546Sopenharmony_ci         if (fs_attr >= VARYING_SLOT_TEX0 &&
4586bf215546Sopenharmony_ci             fs_attr <= VARYING_SLOT_TEX7 &&
4587bf215546Sopenharmony_ci             cso_rast->cso.sprite_coord_enable & (1 << (fs_attr - VARYING_SLOT_TEX0)))
4588bf215546Sopenharmony_ci            point_sprite = true;
4589bf215546Sopenharmony_ci
4590bf215546Sopenharmony_ci         if (fs_attr == VARYING_SLOT_PNTC)
4591bf215546Sopenharmony_ci            point_sprite = true;
4592bf215546Sopenharmony_ci
4593bf215546Sopenharmony_ci         if (point_sprite)
4594bf215546Sopenharmony_ci            *point_sprite_enables |= 1U << input_index;
4595bf215546Sopenharmony_ci      }
4596bf215546Sopenharmony_ci
4597bf215546Sopenharmony_ci      struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
4598bf215546Sopenharmony_ci      if (!point_sprite) {
4599bf215546Sopenharmony_ci         get_attr_override(&attribute, vue_map, *urb_entry_read_offset, fs_attr,
4600bf215546Sopenharmony_ci                           cso_rast->cso.light_twoside, &max_source_attr);
4601bf215546Sopenharmony_ci      }
4602bf215546Sopenharmony_ci
4603bf215546Sopenharmony_ci      /* The hardware can only do the overrides on 16 overrides at a
4604bf215546Sopenharmony_ci       * time, and the other up to 16 have to be lined up so that the
4605bf215546Sopenharmony_ci       * input index = the output index.  We'll need to do some
4606bf215546Sopenharmony_ci       * tweaking to make sure that's the case.
4607bf215546Sopenharmony_ci       */
4608bf215546Sopenharmony_ci      if (input_index < 16)
4609bf215546Sopenharmony_ci         attr_overrides[input_index] = attribute;
4610bf215546Sopenharmony_ci      else
4611bf215546Sopenharmony_ci         assert(attribute.SourceAttribute == input_index);
4612bf215546Sopenharmony_ci   }
4613bf215546Sopenharmony_ci
4614bf215546Sopenharmony_ci   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
4615bf215546Sopenharmony_ci    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
4616bf215546Sopenharmony_ci    *
4617bf215546Sopenharmony_ci    * "This field should be set to the minimum length required to read the
4618bf215546Sopenharmony_ci    *  maximum source attribute.  The maximum source attribute is indicated
4619bf215546Sopenharmony_ci    *  by the maximum value of the enabled Attribute # Source Attribute if
4620bf215546Sopenharmony_ci    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
4621bf215546Sopenharmony_ci    *  enable is not set.
4622bf215546Sopenharmony_ci    *  read_length = ceiling((max_source_attr + 1) / 2)
4623bf215546Sopenharmony_ci    *
4624bf215546Sopenharmony_ci    *  [errata] Corruption/Hang possible if length programmed larger than
4625bf215546Sopenharmony_ci    *  recommended"
4626bf215546Sopenharmony_ci    *
4627bf215546Sopenharmony_ci    * Similar text exists for Ivy Bridge.
4628bf215546Sopenharmony_ci    */
4629bf215546Sopenharmony_ci   *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
4630bf215546Sopenharmony_ci}
4631bf215546Sopenharmony_ci#endif
4632bf215546Sopenharmony_ci
4633bf215546Sopenharmony_ci#if GFX_VER >= 7
4634bf215546Sopenharmony_cistatic void
4635bf215546Sopenharmony_cicrocus_emit_sbe(struct crocus_batch *batch, const struct crocus_context *ice)
4636bf215546Sopenharmony_ci{
4637bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4638bf215546Sopenharmony_ci   const struct brw_wm_prog_data *wm_prog_data = (void *)
4639bf215546Sopenharmony_ci      ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
4640bf215546Sopenharmony_ci#if GFX_VER >= 8
4641bf215546Sopenharmony_ci   struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
4642bf215546Sopenharmony_ci#else
4643bf215546Sopenharmony_ci#define attr_overrides sbe.Attribute
4644bf215546Sopenharmony_ci#endif
4645bf215546Sopenharmony_ci
4646bf215546Sopenharmony_ci   uint32_t urb_entry_read_length;
4647bf215546Sopenharmony_ci   uint32_t urb_entry_read_offset;
4648bf215546Sopenharmony_ci   uint32_t point_sprite_enables;
4649bf215546Sopenharmony_ci
4650bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
4651bf215546Sopenharmony_ci      sbe.AttributeSwizzleEnable = true;
4652bf215546Sopenharmony_ci      sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
4653bf215546Sopenharmony_ci      sbe.PointSpriteTextureCoordinateOrigin = cso_rast->cso.sprite_coord_mode;
4654bf215546Sopenharmony_ci
4655bf215546Sopenharmony_ci      calculate_attr_overrides(ice,
4656bf215546Sopenharmony_ci                               attr_overrides,
4657bf215546Sopenharmony_ci                               &point_sprite_enables,
4658bf215546Sopenharmony_ci                               &urb_entry_read_length,
4659bf215546Sopenharmony_ci                               &urb_entry_read_offset);
4660bf215546Sopenharmony_ci      sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
4661bf215546Sopenharmony_ci      sbe.VertexURBEntryReadLength = urb_entry_read_length;
4662bf215546Sopenharmony_ci      sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
4663bf215546Sopenharmony_ci      sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
4664bf215546Sopenharmony_ci#if GFX_VER >= 8
4665bf215546Sopenharmony_ci      sbe.ForceVertexURBEntryReadLength = true;
4666bf215546Sopenharmony_ci      sbe.ForceVertexURBEntryReadOffset = true;
4667bf215546Sopenharmony_ci#endif
4668bf215546Sopenharmony_ci   }
4669bf215546Sopenharmony_ci#if GFX_VER >= 8
4670bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) {
4671bf215546Sopenharmony_ci      for (int i = 0; i < 16; i++)
4672bf215546Sopenharmony_ci         sbes.Attribute[i] = attr_overrides[i];
4673bf215546Sopenharmony_ci   }
4674bf215546Sopenharmony_ci#endif
4675bf215546Sopenharmony_ci}
4676bf215546Sopenharmony_ci#endif
4677bf215546Sopenharmony_ci
4678bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */
4679bf215546Sopenharmony_ci
4680bf215546Sopenharmony_ci/**
4681bf215546Sopenharmony_ci * Populate VS program key fields based on the current state.
4682bf215546Sopenharmony_ci */
4683bf215546Sopenharmony_cistatic void
4684bf215546Sopenharmony_cicrocus_populate_vs_key(const struct crocus_context *ice,
4685bf215546Sopenharmony_ci                       const struct shader_info *info,
4686bf215546Sopenharmony_ci                       gl_shader_stage last_stage,
4687bf215546Sopenharmony_ci                       struct brw_vs_prog_key *key)
4688bf215546Sopenharmony_ci{
4689bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4690bf215546Sopenharmony_ci
4691bf215546Sopenharmony_ci   if (info->clip_distance_array_size == 0 &&
4692bf215546Sopenharmony_ci       (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4693bf215546Sopenharmony_ci       last_stage == MESA_SHADER_VERTEX)
4694bf215546Sopenharmony_ci      key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4695bf215546Sopenharmony_ci
4696bf215546Sopenharmony_ci   if (last_stage == MESA_SHADER_VERTEX &&
4697bf215546Sopenharmony_ci       info->outputs_written & (VARYING_BIT_PSIZ))
4698bf215546Sopenharmony_ci      key->clamp_pointsize = 1;
4699bf215546Sopenharmony_ci
4700bf215546Sopenharmony_ci#if GFX_VER <= 5
4701bf215546Sopenharmony_ci   key->copy_edgeflag = (cso_rast->cso.fill_back != PIPE_POLYGON_MODE_FILL ||
4702bf215546Sopenharmony_ci                         cso_rast->cso.fill_front != PIPE_POLYGON_MODE_FILL);
4703bf215546Sopenharmony_ci   key->point_coord_replace = cso_rast->cso.sprite_coord_enable & 0xff;
4704bf215546Sopenharmony_ci#endif
4705bf215546Sopenharmony_ci
4706bf215546Sopenharmony_ci   key->clamp_vertex_color = cso_rast->cso.clamp_vertex_color;
4707bf215546Sopenharmony_ci
4708bf215546Sopenharmony_ci#if GFX_VERx10 < 75
4709bf215546Sopenharmony_ci   uint64_t inputs_read = info->inputs_read;
4710bf215546Sopenharmony_ci   int ve_idx = 0;
4711bf215546Sopenharmony_ci   while (inputs_read) {
4712bf215546Sopenharmony_ci      int i = u_bit_scan64(&inputs_read);
4713bf215546Sopenharmony_ci      key->gl_attrib_wa_flags[i] = ice->state.cso_vertex_elements->wa_flags[ve_idx];
4714bf215546Sopenharmony_ci      ve_idx++;
4715bf215546Sopenharmony_ci   }
4716bf215546Sopenharmony_ci#endif
4717bf215546Sopenharmony_ci}
4718bf215546Sopenharmony_ci
4719bf215546Sopenharmony_ci/**
4720bf215546Sopenharmony_ci * Populate TCS program key fields based on the current state.
4721bf215546Sopenharmony_ci */
4722bf215546Sopenharmony_cistatic void
4723bf215546Sopenharmony_cicrocus_populate_tcs_key(const struct crocus_context *ice,
4724bf215546Sopenharmony_ci                        struct brw_tcs_prog_key *key)
4725bf215546Sopenharmony_ci{
4726bf215546Sopenharmony_ci}
4727bf215546Sopenharmony_ci
4728bf215546Sopenharmony_ci/**
4729bf215546Sopenharmony_ci * Populate TES program key fields based on the current state.
4730bf215546Sopenharmony_ci */
4731bf215546Sopenharmony_cistatic void
4732bf215546Sopenharmony_cicrocus_populate_tes_key(const struct crocus_context *ice,
4733bf215546Sopenharmony_ci                        const struct shader_info *info,
4734bf215546Sopenharmony_ci                        gl_shader_stage last_stage,
4735bf215546Sopenharmony_ci                        struct brw_tes_prog_key *key)
4736bf215546Sopenharmony_ci{
4737bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4738bf215546Sopenharmony_ci
4739bf215546Sopenharmony_ci   if (info->clip_distance_array_size == 0 &&
4740bf215546Sopenharmony_ci       (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4741bf215546Sopenharmony_ci       last_stage == MESA_SHADER_TESS_EVAL)
4742bf215546Sopenharmony_ci      key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4743bf215546Sopenharmony_ci
4744bf215546Sopenharmony_ci   if (last_stage == MESA_SHADER_TESS_EVAL &&
4745bf215546Sopenharmony_ci       info->outputs_written & (VARYING_BIT_PSIZ))
4746bf215546Sopenharmony_ci      key->clamp_pointsize = 1;
4747bf215546Sopenharmony_ci}
4748bf215546Sopenharmony_ci
4749bf215546Sopenharmony_ci/**
4750bf215546Sopenharmony_ci * Populate GS program key fields based on the current state.
4751bf215546Sopenharmony_ci */
4752bf215546Sopenharmony_cistatic void
4753bf215546Sopenharmony_cicrocus_populate_gs_key(const struct crocus_context *ice,
4754bf215546Sopenharmony_ci                       const struct shader_info *info,
4755bf215546Sopenharmony_ci                       gl_shader_stage last_stage,
4756bf215546Sopenharmony_ci                       struct brw_gs_prog_key *key)
4757bf215546Sopenharmony_ci{
4758bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
4759bf215546Sopenharmony_ci
4760bf215546Sopenharmony_ci   if (info->clip_distance_array_size == 0 &&
4761bf215546Sopenharmony_ci       (info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&
4762bf215546Sopenharmony_ci       last_stage == MESA_SHADER_GEOMETRY)
4763bf215546Sopenharmony_ci      key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;
4764bf215546Sopenharmony_ci
4765bf215546Sopenharmony_ci   if (last_stage == MESA_SHADER_GEOMETRY &&
4766bf215546Sopenharmony_ci       info->outputs_written & (VARYING_BIT_PSIZ))
4767bf215546Sopenharmony_ci      key->clamp_pointsize = 1;
4768bf215546Sopenharmony_ci}
4769bf215546Sopenharmony_ci
4770bf215546Sopenharmony_ci/**
4771bf215546Sopenharmony_ci * Populate FS program key fields based on the current state.
4772bf215546Sopenharmony_ci */
4773bf215546Sopenharmony_cistatic void
4774bf215546Sopenharmony_cicrocus_populate_fs_key(const struct crocus_context *ice,
4775bf215546Sopenharmony_ci                       const struct shader_info *info,
4776bf215546Sopenharmony_ci                       struct brw_wm_prog_key *key)
4777bf215546Sopenharmony_ci{
4778bf215546Sopenharmony_ci   struct crocus_screen *screen = (void *) ice->ctx.screen;
4779bf215546Sopenharmony_ci   const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
4780bf215546Sopenharmony_ci   const struct crocus_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;
4781bf215546Sopenharmony_ci   const struct crocus_rasterizer_state *rast = ice->state.cso_rast;
4782bf215546Sopenharmony_ci   const struct crocus_blend_state *blend = ice->state.cso_blend;
4783bf215546Sopenharmony_ci
4784bf215546Sopenharmony_ci#if GFX_VER < 6
4785bf215546Sopenharmony_ci   uint32_t lookup = 0;
4786bf215546Sopenharmony_ci
4787bf215546Sopenharmony_ci   if (info->fs.uses_discard || zsa->cso.alpha_enabled)
4788bf215546Sopenharmony_ci      lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
4789bf215546Sopenharmony_ci
4790bf215546Sopenharmony_ci   if (info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
4791bf215546Sopenharmony_ci      lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
4792bf215546Sopenharmony_ci
4793bf215546Sopenharmony_ci   if (fb->zsbuf && zsa->cso.depth_enabled) {
4794bf215546Sopenharmony_ci      lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
4795bf215546Sopenharmony_ci
4796bf215546Sopenharmony_ci      if (zsa->cso.depth_writemask)
4797bf215546Sopenharmony_ci         lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
4798bf215546Sopenharmony_ci
4799bf215546Sopenharmony_ci   }
4800bf215546Sopenharmony_ci   if (zsa->cso.stencil[0].enabled || zsa->cso.stencil[1].enabled) {
4801bf215546Sopenharmony_ci      lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;
4802bf215546Sopenharmony_ci      if (zsa->cso.stencil[0].writemask || zsa->cso.stencil[1].writemask)
4803bf215546Sopenharmony_ci         lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;
4804bf215546Sopenharmony_ci   }
4805bf215546Sopenharmony_ci   key->iz_lookup = lookup;
4806bf215546Sopenharmony_ci   key->stats_wm = ice->state.stats_wm;
4807bf215546Sopenharmony_ci#endif
4808bf215546Sopenharmony_ci
4809bf215546Sopenharmony_ci   uint32_t line_aa = BRW_WM_AA_NEVER;
4810bf215546Sopenharmony_ci   if (rast->cso.line_smooth) {
4811bf215546Sopenharmony_ci      int reduced_prim = ice->state.reduced_prim_mode;
4812bf215546Sopenharmony_ci      if (reduced_prim == PIPE_PRIM_LINES)
4813bf215546Sopenharmony_ci         line_aa = BRW_WM_AA_ALWAYS;
4814bf215546Sopenharmony_ci      else if (reduced_prim == PIPE_PRIM_TRIANGLES) {
4815bf215546Sopenharmony_ci         if (rast->cso.fill_front == PIPE_POLYGON_MODE_LINE) {
4816bf215546Sopenharmony_ci            line_aa = BRW_WM_AA_SOMETIMES;
4817bf215546Sopenharmony_ci
4818bf215546Sopenharmony_ci            if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE ||
4819bf215546Sopenharmony_ci                rast->cso.cull_face == PIPE_FACE_BACK)
4820bf215546Sopenharmony_ci               line_aa = BRW_WM_AA_ALWAYS;
4821bf215546Sopenharmony_ci         } else if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE) {
4822bf215546Sopenharmony_ci            line_aa = BRW_WM_AA_SOMETIMES;
4823bf215546Sopenharmony_ci
4824bf215546Sopenharmony_ci            if (rast->cso.cull_face == PIPE_FACE_FRONT)
4825bf215546Sopenharmony_ci               line_aa = BRW_WM_AA_ALWAYS;
4826bf215546Sopenharmony_ci         }
4827bf215546Sopenharmony_ci      }
4828bf215546Sopenharmony_ci   }
4829bf215546Sopenharmony_ci   key->line_aa = line_aa;
4830bf215546Sopenharmony_ci
4831bf215546Sopenharmony_ci   key->nr_color_regions = fb->nr_cbufs;
4832bf215546Sopenharmony_ci
4833bf215546Sopenharmony_ci   key->clamp_fragment_color = rast->cso.clamp_fragment_color;
4834bf215546Sopenharmony_ci
4835bf215546Sopenharmony_ci   key->alpha_to_coverage = blend->cso.alpha_to_coverage;
4836bf215546Sopenharmony_ci
4837bf215546Sopenharmony_ci   key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->cso.alpha_enabled;
4838bf215546Sopenharmony_ci
4839bf215546Sopenharmony_ci   key->flat_shade = rast->cso.flatshade &&
4840bf215546Sopenharmony_ci      (info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));
4841bf215546Sopenharmony_ci
4842bf215546Sopenharmony_ci   key->persample_interp = rast->cso.force_persample_interp;
4843bf215546Sopenharmony_ci   key->multisample_fbo = rast->cso.multisample && fb->samples > 1;
4844bf215546Sopenharmony_ci
4845bf215546Sopenharmony_ci   key->ignore_sample_mask_out = !key->multisample_fbo;
4846bf215546Sopenharmony_ci   key->coherent_fb_fetch = false; // TODO: needed?
4847bf215546Sopenharmony_ci
4848bf215546Sopenharmony_ci   key->force_dual_color_blend =
4849bf215546Sopenharmony_ci      screen->driconf.dual_color_blend_by_location &&
4850bf215546Sopenharmony_ci      (blend->blend_enables & 1) && blend->dual_color_blending;
4851bf215546Sopenharmony_ci
4852bf215546Sopenharmony_ci#if GFX_VER <= 5
4853bf215546Sopenharmony_ci   if (fb->nr_cbufs > 1 && zsa->cso.alpha_enabled) {
4854bf215546Sopenharmony_ci      key->emit_alpha_test = true;
4855bf215546Sopenharmony_ci      key->alpha_test_func = zsa->cso.alpha_func;
4856bf215546Sopenharmony_ci      key->alpha_test_ref = zsa->cso.alpha_ref_value;
4857bf215546Sopenharmony_ci   }
4858bf215546Sopenharmony_ci#endif
4859bf215546Sopenharmony_ci}
4860bf215546Sopenharmony_ci
4861bf215546Sopenharmony_cistatic void
4862bf215546Sopenharmony_cicrocus_populate_cs_key(const struct crocus_context *ice,
4863bf215546Sopenharmony_ci                       struct brw_cs_prog_key *key)
4864bf215546Sopenharmony_ci{
4865bf215546Sopenharmony_ci}
4866bf215546Sopenharmony_ci
4867bf215546Sopenharmony_ci#if GFX_VER == 4
4868bf215546Sopenharmony_ci#define KSP(ice, shader) ro_bo((ice)->shaders.cache_bo, (shader)->offset);
4869bf215546Sopenharmony_ci#elif GFX_VER >= 5
4870bf215546Sopenharmony_cistatic uint64_t
4871bf215546Sopenharmony_ciKSP(const struct crocus_context *ice, const struct crocus_compiled_shader *shader)
4872bf215546Sopenharmony_ci{
4873bf215546Sopenharmony_ci   return shader->offset;
4874bf215546Sopenharmony_ci}
4875bf215546Sopenharmony_ci#endif
4876bf215546Sopenharmony_ci
4877bf215546Sopenharmony_ci/* Gen11 workaround table #2056 WABTPPrefetchDisable suggests to disable
4878bf215546Sopenharmony_ci * prefetching of binding tables in A0 and B0 steppings.  XXX: Revisit
4879bf215546Sopenharmony_ci * this WA on C0 stepping.
4880bf215546Sopenharmony_ci *
4881bf215546Sopenharmony_ci * TODO: Fill out SamplerCount for prefetching?
4882bf215546Sopenharmony_ci */
4883bf215546Sopenharmony_ci
4884bf215546Sopenharmony_ci#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage)                 \
4885bf215546Sopenharmony_ci   pkt.KernelStartPointer = KSP(ice, shader);                           \
4886bf215546Sopenharmony_ci   pkt.BindingTableEntryCount = shader->bt.size_bytes / 4;              \
4887bf215546Sopenharmony_ci   pkt.FloatingPointMode = prog_data->use_alt_mode;                     \
4888bf215546Sopenharmony_ci                                                                        \
4889bf215546Sopenharmony_ci   pkt.DispatchGRFStartRegisterForURBData =                             \
4890bf215546Sopenharmony_ci      prog_data->dispatch_grf_start_reg;                                \
4891bf215546Sopenharmony_ci   pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;     \
4892bf215546Sopenharmony_ci   pkt.prefix##URBEntryReadOffset = 0;                                  \
4893bf215546Sopenharmony_ci                                                                        \
4894bf215546Sopenharmony_ci   pkt.StatisticsEnable = true;                                         \
4895bf215546Sopenharmony_ci   pkt.Enable           = true;                                         \
4896bf215546Sopenharmony_ci                                                                        \
4897bf215546Sopenharmony_ci   if (prog_data->total_scratch) {                                      \
4898bf215546Sopenharmony_ci      struct crocus_bo *bo =                                            \
4899bf215546Sopenharmony_ci         crocus_get_scratch_space(ice, prog_data->total_scratch, stage); \
4900bf215546Sopenharmony_ci      pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;   \
4901bf215546Sopenharmony_ci      pkt.ScratchSpaceBasePointer = rw_bo(bo, 0);                       \
4902bf215546Sopenharmony_ci   }
4903bf215546Sopenharmony_ci
4904bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */
4905bf215546Sopenharmony_ci#if GFX_VER >= 6
4906bf215546Sopenharmony_cistatic const uint32_t push_constant_opcodes[] = {
4907bf215546Sopenharmony_ci   [MESA_SHADER_VERTEX]    = 21,
4908bf215546Sopenharmony_ci   [MESA_SHADER_TESS_CTRL] = 25, /* HS */
4909bf215546Sopenharmony_ci   [MESA_SHADER_TESS_EVAL] = 26, /* DS */
4910bf215546Sopenharmony_ci   [MESA_SHADER_GEOMETRY]  = 22,
4911bf215546Sopenharmony_ci   [MESA_SHADER_FRAGMENT]  = 23,
4912bf215546Sopenharmony_ci   [MESA_SHADER_COMPUTE]   = 0,
4913bf215546Sopenharmony_ci};
4914bf215546Sopenharmony_ci#endif
4915bf215546Sopenharmony_ci
4916bf215546Sopenharmony_cistatic void
4917bf215546Sopenharmony_ciemit_sized_null_surface(struct crocus_batch *batch,
4918bf215546Sopenharmony_ci                        unsigned width, unsigned height,
4919bf215546Sopenharmony_ci                        unsigned layers, unsigned levels,
4920bf215546Sopenharmony_ci                        unsigned minimum_array_element,
4921bf215546Sopenharmony_ci                        uint32_t *out_offset)
4922bf215546Sopenharmony_ci{
4923bf215546Sopenharmony_ci   struct isl_device *isl_dev = &batch->screen->isl_dev;
4924bf215546Sopenharmony_ci   uint32_t *surf = stream_state(batch, isl_dev->ss.size,
4925bf215546Sopenharmony_ci                                 isl_dev->ss.align,
4926bf215546Sopenharmony_ci                                 out_offset);
4927bf215546Sopenharmony_ci   //TODO gen 6 multisample crash
4928bf215546Sopenharmony_ci   isl_null_fill_state(isl_dev, surf,
4929bf215546Sopenharmony_ci                       .size = isl_extent3d(width, height, layers),
4930bf215546Sopenharmony_ci                       .levels = levels,
4931bf215546Sopenharmony_ci                       .minimum_array_element = minimum_array_element);
4932bf215546Sopenharmony_ci}
4933bf215546Sopenharmony_cistatic void
4934bf215546Sopenharmony_ciemit_null_surface(struct crocus_batch *batch,
4935bf215546Sopenharmony_ci                  uint32_t *out_offset)
4936bf215546Sopenharmony_ci{
4937bf215546Sopenharmony_ci   emit_sized_null_surface(batch, 1, 1, 1, 0, 0, out_offset);
4938bf215546Sopenharmony_ci}
4939bf215546Sopenharmony_ci
4940bf215546Sopenharmony_cistatic void
4941bf215546Sopenharmony_ciemit_null_fb_surface(struct crocus_batch *batch,
4942bf215546Sopenharmony_ci                     struct crocus_context *ice,
4943bf215546Sopenharmony_ci                     uint32_t *out_offset)
4944bf215546Sopenharmony_ci{
4945bf215546Sopenharmony_ci   uint32_t width, height, layers, level, layer;
4946bf215546Sopenharmony_ci   /* If set_framebuffer_state() was never called, fall back to 1x1x1 */
4947bf215546Sopenharmony_ci   if (ice->state.framebuffer.width == 0 && ice->state.framebuffer.height == 0) {
4948bf215546Sopenharmony_ci      emit_null_surface(batch, out_offset);
4949bf215546Sopenharmony_ci      return;
4950bf215546Sopenharmony_ci   }
4951bf215546Sopenharmony_ci
4952bf215546Sopenharmony_ci   struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
4953bf215546Sopenharmony_ci   width = MAX2(cso->width, 1);
4954bf215546Sopenharmony_ci   height = MAX2(cso->height, 1);
4955bf215546Sopenharmony_ci   layers = cso->layers ? cso->layers : 1;
4956bf215546Sopenharmony_ci   level = 0;
4957bf215546Sopenharmony_ci   layer = 0;
4958bf215546Sopenharmony_ci
4959bf215546Sopenharmony_ci   if (cso->nr_cbufs == 0 && cso->zsbuf) {
4960bf215546Sopenharmony_ci      width = cso->zsbuf->width;
4961bf215546Sopenharmony_ci      height = cso->zsbuf->height;
4962bf215546Sopenharmony_ci      level = cso->zsbuf->u.tex.level;
4963bf215546Sopenharmony_ci      layer = cso->zsbuf->u.tex.first_layer;
4964bf215546Sopenharmony_ci   }
4965bf215546Sopenharmony_ci   emit_sized_null_surface(batch, width, height,
4966bf215546Sopenharmony_ci                           layers, level, layer,
4967bf215546Sopenharmony_ci                           out_offset);
4968bf215546Sopenharmony_ci}
4969bf215546Sopenharmony_ci
4970bf215546Sopenharmony_cistatic void
4971bf215546Sopenharmony_ciemit_surface_state(struct crocus_batch *batch,
4972bf215546Sopenharmony_ci                   struct crocus_resource *res,
4973bf215546Sopenharmony_ci                   const struct isl_surf *in_surf,
4974bf215546Sopenharmony_ci                   bool adjust_surf,
4975bf215546Sopenharmony_ci                   struct isl_view *in_view,
4976bf215546Sopenharmony_ci                   bool writeable,
4977bf215546Sopenharmony_ci                   enum isl_aux_usage aux_usage,
4978bf215546Sopenharmony_ci                   bool blend_enable,
4979bf215546Sopenharmony_ci                   uint32_t write_disables,
4980bf215546Sopenharmony_ci                   uint32_t *surf_state,
4981bf215546Sopenharmony_ci                   uint32_t addr_offset)
4982bf215546Sopenharmony_ci{
4983bf215546Sopenharmony_ci   struct isl_device *isl_dev = &batch->screen->isl_dev;
4984bf215546Sopenharmony_ci   uint32_t reloc = RELOC_32BIT;
4985bf215546Sopenharmony_ci   uint64_t offset_B = res->offset;
4986bf215546Sopenharmony_ci   uint32_t tile_x_sa = 0, tile_y_sa = 0;
4987bf215546Sopenharmony_ci
4988bf215546Sopenharmony_ci   if (writeable)
4989bf215546Sopenharmony_ci      reloc |= RELOC_WRITE;
4990bf215546Sopenharmony_ci
4991bf215546Sopenharmony_ci   struct isl_surf surf = *in_surf;
4992bf215546Sopenharmony_ci   struct isl_view view = *in_view;
4993bf215546Sopenharmony_ci   if (adjust_surf) {
4994bf215546Sopenharmony_ci      if (res->base.b.target == PIPE_TEXTURE_3D && view.array_len == 1) {
4995bf215546Sopenharmony_ci         isl_surf_get_image_surf(isl_dev, in_surf,
4996bf215546Sopenharmony_ci                                 view.base_level, 0,
4997bf215546Sopenharmony_ci                                 view.base_array_layer,
4998bf215546Sopenharmony_ci                                 &surf, &offset_B,
4999bf215546Sopenharmony_ci                                 &tile_x_sa, &tile_y_sa);
5000bf215546Sopenharmony_ci         view.base_array_layer = 0;
5001bf215546Sopenharmony_ci         view.base_level = 0;
5002bf215546Sopenharmony_ci      } else if (res->base.b.target == PIPE_TEXTURE_CUBE && GFX_VER == 4) {
5003bf215546Sopenharmony_ci         isl_surf_get_image_surf(isl_dev, in_surf,
5004bf215546Sopenharmony_ci                                 view.base_level, view.base_array_layer,
5005bf215546Sopenharmony_ci                                 0,
5006bf215546Sopenharmony_ci                                 &surf, &offset_B,
5007bf215546Sopenharmony_ci                                 &tile_x_sa, &tile_y_sa);
5008bf215546Sopenharmony_ci         view.base_array_layer = 0;
5009bf215546Sopenharmony_ci         view.base_level = 0;
5010bf215546Sopenharmony_ci      } else if (res->base.b.target == PIPE_TEXTURE_1D_ARRAY)
5011bf215546Sopenharmony_ci         surf.dim = ISL_SURF_DIM_2D;
5012bf215546Sopenharmony_ci   }
5013bf215546Sopenharmony_ci
5014bf215546Sopenharmony_ci   union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
5015bf215546Sopenharmony_ci   struct crocus_bo *aux_bo = NULL;
5016bf215546Sopenharmony_ci   uint32_t aux_offset = 0;
5017bf215546Sopenharmony_ci   struct isl_surf *aux_surf = NULL;
5018bf215546Sopenharmony_ci   if (aux_usage != ISL_AUX_USAGE_NONE) {
5019bf215546Sopenharmony_ci      aux_surf = &res->aux.surf;
5020bf215546Sopenharmony_ci      aux_offset = res->aux.offset;
5021bf215546Sopenharmony_ci      aux_bo = res->aux.bo;
5022bf215546Sopenharmony_ci
5023bf215546Sopenharmony_ci      clear_color = crocus_resource_get_clear_color(res);
5024bf215546Sopenharmony_ci   }
5025bf215546Sopenharmony_ci
5026bf215546Sopenharmony_ci   isl_surf_fill_state(isl_dev, surf_state,
5027bf215546Sopenharmony_ci                       .surf = &surf,
5028bf215546Sopenharmony_ci                       .view = &view,
5029bf215546Sopenharmony_ci                       .address = crocus_state_reloc(batch,
5030bf215546Sopenharmony_ci                                                     addr_offset + isl_dev->ss.addr_offset,
5031bf215546Sopenharmony_ci                                                     res->bo, offset_B, reloc),
5032bf215546Sopenharmony_ci                       .aux_surf = aux_surf,
5033bf215546Sopenharmony_ci                       .aux_usage = aux_usage,
5034bf215546Sopenharmony_ci                       .aux_address = aux_offset,
5035bf215546Sopenharmony_ci                       .mocs = crocus_mocs(res->bo, isl_dev),
5036bf215546Sopenharmony_ci                       .clear_color = clear_color,
5037bf215546Sopenharmony_ci                       .use_clear_address = false,
5038bf215546Sopenharmony_ci                       .clear_address = 0,
5039bf215546Sopenharmony_ci                       .x_offset_sa = tile_x_sa,
5040bf215546Sopenharmony_ci                       .y_offset_sa = tile_y_sa,
5041bf215546Sopenharmony_ci#if GFX_VER <= 5
5042bf215546Sopenharmony_ci                       .blend_enable = blend_enable,
5043bf215546Sopenharmony_ci                       .write_disables = write_disables,
5044bf215546Sopenharmony_ci#endif
5045bf215546Sopenharmony_ci      );
5046bf215546Sopenharmony_ci
5047bf215546Sopenharmony_ci   if (aux_surf) {
5048bf215546Sopenharmony_ci      /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
5049bf215546Sopenharmony_ci       * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
5050bf215546Sopenharmony_ci       * contain other control information.  Since buffer addresses are always
5051bf215546Sopenharmony_ci       * on 4k boundaries (and thus have their lower 12 bits zero), we can use
5052bf215546Sopenharmony_ci       * an ordinary reloc to do the necessary address translation.
5053bf215546Sopenharmony_ci       *
5054bf215546Sopenharmony_ci       * FIXME: move to the point of assignment.
5055bf215546Sopenharmony_ci       */
5056bf215546Sopenharmony_ci      if (GFX_VER == 8) {
5057bf215546Sopenharmony_ci         uint64_t *aux_addr = (uint64_t *)(surf_state + (isl_dev->ss.aux_addr_offset / 4));
5058bf215546Sopenharmony_ci         *aux_addr = crocus_state_reloc(batch,
5059bf215546Sopenharmony_ci                                        addr_offset + isl_dev->ss.aux_addr_offset,
5060bf215546Sopenharmony_ci                                        aux_bo, *aux_addr,
5061bf215546Sopenharmony_ci                                        reloc);
5062bf215546Sopenharmony_ci      } else {
5063bf215546Sopenharmony_ci         uint32_t *aux_addr = surf_state + (isl_dev->ss.aux_addr_offset / 4);
5064bf215546Sopenharmony_ci         *aux_addr = crocus_state_reloc(batch,
5065bf215546Sopenharmony_ci                                        addr_offset + isl_dev->ss.aux_addr_offset,
5066bf215546Sopenharmony_ci                                        aux_bo, *aux_addr,
5067bf215546Sopenharmony_ci                                        reloc);
5068bf215546Sopenharmony_ci      }
5069bf215546Sopenharmony_ci   }
5070bf215546Sopenharmony_ci
5071bf215546Sopenharmony_ci}
5072bf215546Sopenharmony_ci
5073bf215546Sopenharmony_cistatic uint32_t
5074bf215546Sopenharmony_ciemit_surface(struct crocus_batch *batch,
5075bf215546Sopenharmony_ci             struct crocus_surface *surf,
5076bf215546Sopenharmony_ci             enum isl_aux_usage aux_usage,
5077bf215546Sopenharmony_ci             bool blend_enable,
5078bf215546Sopenharmony_ci             uint32_t write_disables)
5079bf215546Sopenharmony_ci{
5080bf215546Sopenharmony_ci   struct isl_device *isl_dev = &batch->screen->isl_dev;
5081bf215546Sopenharmony_ci   struct crocus_resource *res = (struct crocus_resource *)surf->base.texture;
5082bf215546Sopenharmony_ci   struct isl_view *view = &surf->view;
5083bf215546Sopenharmony_ci   uint32_t offset = 0;
5084bf215546Sopenharmony_ci   enum pipe_texture_target target = res->base.b.target;
5085bf215546Sopenharmony_ci   bool adjust_surf = false;
5086bf215546Sopenharmony_ci
5087bf215546Sopenharmony_ci   if (GFX_VER == 4 && target == PIPE_TEXTURE_CUBE)
5088bf215546Sopenharmony_ci      adjust_surf = true;
5089bf215546Sopenharmony_ci
5090bf215546Sopenharmony_ci   if (surf->align_res)
5091bf215546Sopenharmony_ci      res = (struct crocus_resource *)surf->align_res;
5092bf215546Sopenharmony_ci
5093bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset);
5094bf215546Sopenharmony_ci
5095bf215546Sopenharmony_ci   emit_surface_state(batch, res, &surf->surf, adjust_surf, view, true,
5096bf215546Sopenharmony_ci                      aux_usage, blend_enable,
5097bf215546Sopenharmony_ci                      write_disables,
5098bf215546Sopenharmony_ci                      surf_state, offset);
5099bf215546Sopenharmony_ci   return offset;
5100bf215546Sopenharmony_ci}
5101bf215546Sopenharmony_ci
5102bf215546Sopenharmony_cistatic uint32_t
5103bf215546Sopenharmony_ciemit_rt_surface(struct crocus_batch *batch,
5104bf215546Sopenharmony_ci                struct crocus_surface *surf,
5105bf215546Sopenharmony_ci                enum isl_aux_usage aux_usage)
5106bf215546Sopenharmony_ci{
5107bf215546Sopenharmony_ci   struct isl_device *isl_dev = &batch->screen->isl_dev;
5108bf215546Sopenharmony_ci   struct crocus_resource *res = (struct crocus_resource *)surf->base.texture;
5109bf215546Sopenharmony_ci   struct isl_view *view = &surf->read_view;
5110bf215546Sopenharmony_ci   uint32_t offset = 0;
5111bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset);
5112bf215546Sopenharmony_ci
5113bf215546Sopenharmony_ci   emit_surface_state(batch, res, &surf->surf, true, view, false,
5114bf215546Sopenharmony_ci                      aux_usage, 0, false,
5115bf215546Sopenharmony_ci                      surf_state, offset);
5116bf215546Sopenharmony_ci   return offset;
5117bf215546Sopenharmony_ci}
5118bf215546Sopenharmony_ci
5119bf215546Sopenharmony_cistatic uint32_t
5120bf215546Sopenharmony_ciemit_grid(struct crocus_context *ice,
5121bf215546Sopenharmony_ci          struct crocus_batch *batch)
5122bf215546Sopenharmony_ci{
5123bf215546Sopenharmony_ci   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5124bf215546Sopenharmony_ci   uint32_t offset = 0;
5125bf215546Sopenharmony_ci   struct crocus_state_ref *grid_ref = &ice->state.grid_size;
5126bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5127bf215546Sopenharmony_ci                                       isl_dev->ss.align, &offset);
5128bf215546Sopenharmony_ci   isl_buffer_fill_state(isl_dev, surf_state,
5129bf215546Sopenharmony_ci                         .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5130bf215546Sopenharmony_ci                                                       crocus_resource_bo(grid_ref->res),
5131bf215546Sopenharmony_ci                                                       grid_ref->offset,
5132bf215546Sopenharmony_ci                                                       RELOC_32BIT),
5133bf215546Sopenharmony_ci                         .size_B = 12,
5134bf215546Sopenharmony_ci                         .format = ISL_FORMAT_RAW,
5135bf215546Sopenharmony_ci                         .stride_B = 1,
5136bf215546Sopenharmony_ci                         .mocs = crocus_mocs(crocus_resource_bo(grid_ref->res), isl_dev));
5137bf215546Sopenharmony_ci   return offset;
5138bf215546Sopenharmony_ci}
5139bf215546Sopenharmony_ci
5140bf215546Sopenharmony_cistatic uint32_t
5141bf215546Sopenharmony_ciemit_ubo_buffer(struct crocus_context *ice,
5142bf215546Sopenharmony_ci                struct crocus_batch *batch,
5143bf215546Sopenharmony_ci                struct pipe_constant_buffer *buffer)
5144bf215546Sopenharmony_ci{
5145bf215546Sopenharmony_ci   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5146bf215546Sopenharmony_ci   uint32_t offset = 0;
5147bf215546Sopenharmony_ci
5148bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5149bf215546Sopenharmony_ci                                       isl_dev->ss.align, &offset);
5150bf215546Sopenharmony_ci   isl_buffer_fill_state(isl_dev, surf_state,
5151bf215546Sopenharmony_ci                         .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5152bf215546Sopenharmony_ci                                                       crocus_resource_bo(buffer->buffer),
5153bf215546Sopenharmony_ci                                                       buffer->buffer_offset,
5154bf215546Sopenharmony_ci                                                       RELOC_32BIT),
5155bf215546Sopenharmony_ci                         .size_B = buffer->buffer_size,
5156bf215546Sopenharmony_ci                         .format = 0,
5157bf215546Sopenharmony_ci                         .swizzle = ISL_SWIZZLE_IDENTITY,
5158bf215546Sopenharmony_ci                         .stride_B = 1,
5159bf215546Sopenharmony_ci                         .mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev));
5160bf215546Sopenharmony_ci
5161bf215546Sopenharmony_ci   return offset;
5162bf215546Sopenharmony_ci}
5163bf215546Sopenharmony_ci
5164bf215546Sopenharmony_cistatic uint32_t
5165bf215546Sopenharmony_ciemit_ssbo_buffer(struct crocus_context *ice,
5166bf215546Sopenharmony_ci                 struct crocus_batch *batch,
5167bf215546Sopenharmony_ci                 struct pipe_shader_buffer *buffer, bool writeable)
5168bf215546Sopenharmony_ci{
5169bf215546Sopenharmony_ci   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5170bf215546Sopenharmony_ci   uint32_t offset = 0;
5171bf215546Sopenharmony_ci   uint32_t reloc = RELOC_32BIT;
5172bf215546Sopenharmony_ci
5173bf215546Sopenharmony_ci   if (writeable)
5174bf215546Sopenharmony_ci      reloc |= RELOC_WRITE;
5175bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5176bf215546Sopenharmony_ci                                       isl_dev->ss.align, &offset);
5177bf215546Sopenharmony_ci   isl_buffer_fill_state(isl_dev, surf_state,
5178bf215546Sopenharmony_ci                         .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5179bf215546Sopenharmony_ci                                                       crocus_resource_bo(buffer->buffer),
5180bf215546Sopenharmony_ci                                                       buffer->buffer_offset,
5181bf215546Sopenharmony_ci                                                       reloc),
5182bf215546Sopenharmony_ci                         .size_B = buffer->buffer_size,
5183bf215546Sopenharmony_ci                         .format = ISL_FORMAT_RAW,
5184bf215546Sopenharmony_ci                         .swizzle = ISL_SWIZZLE_IDENTITY,
5185bf215546Sopenharmony_ci                         .stride_B = 1,
5186bf215546Sopenharmony_ci                         .mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev));
5187bf215546Sopenharmony_ci
5188bf215546Sopenharmony_ci   return offset;
5189bf215546Sopenharmony_ci}
5190bf215546Sopenharmony_ci
5191bf215546Sopenharmony_cistatic uint32_t
5192bf215546Sopenharmony_ciemit_sampler_view(struct crocus_context *ice,
5193bf215546Sopenharmony_ci                  struct crocus_batch *batch,
5194bf215546Sopenharmony_ci                  bool for_gather,
5195bf215546Sopenharmony_ci                  struct crocus_sampler_view *isv)
5196bf215546Sopenharmony_ci{
5197bf215546Sopenharmony_ci   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5198bf215546Sopenharmony_ci   uint32_t offset = 0;
5199bf215546Sopenharmony_ci
5200bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5201bf215546Sopenharmony_ci                                       isl_dev->ss.align, &offset);
5202bf215546Sopenharmony_ci
5203bf215546Sopenharmony_ci   if (isv->base.target == PIPE_BUFFER) {
5204bf215546Sopenharmony_ci      const struct isl_format_layout *fmtl = isl_format_get_layout(isv->view.format);
5205bf215546Sopenharmony_ci      const unsigned cpp = isv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;
5206bf215546Sopenharmony_ci      unsigned final_size =
5207bf215546Sopenharmony_ci         MIN3(isv->base.u.buf.size, isv->res->bo->size - isv->res->offset,
5208bf215546Sopenharmony_ci              CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp);
5209bf215546Sopenharmony_ci      isl_buffer_fill_state(isl_dev, surf_state,
5210bf215546Sopenharmony_ci                            .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5211bf215546Sopenharmony_ci                                                          isv->res->bo,
5212bf215546Sopenharmony_ci                                                          isv->res->offset + isv->base.u.buf.offset, RELOC_32BIT),
5213bf215546Sopenharmony_ci                            .size_B = final_size,
5214bf215546Sopenharmony_ci                            .format = isv->view.format,
5215bf215546Sopenharmony_ci                            .swizzle = isv->view.swizzle,
5216bf215546Sopenharmony_ci                            .stride_B = cpp,
5217bf215546Sopenharmony_ci                            .mocs = crocus_mocs(isv->res->bo, isl_dev)
5218bf215546Sopenharmony_ci         );
5219bf215546Sopenharmony_ci   } else {
5220bf215546Sopenharmony_ci      enum isl_aux_usage aux_usage =
5221bf215546Sopenharmony_ci         crocus_resource_texture_aux_usage(isv->res);
5222bf215546Sopenharmony_ci
5223bf215546Sopenharmony_ci      emit_surface_state(batch, isv->res, &isv->res->surf, false,
5224bf215546Sopenharmony_ci                         for_gather ? &isv->gather_view : &isv->view,
5225bf215546Sopenharmony_ci                         false, aux_usage, false,
5226bf215546Sopenharmony_ci                         0, surf_state, offset);
5227bf215546Sopenharmony_ci   }
5228bf215546Sopenharmony_ci   return offset;
5229bf215546Sopenharmony_ci}
5230bf215546Sopenharmony_ci
5231bf215546Sopenharmony_cistatic uint32_t
5232bf215546Sopenharmony_ciemit_image_view(struct crocus_context *ice,
5233bf215546Sopenharmony_ci                struct crocus_batch *batch,
5234bf215546Sopenharmony_ci                struct crocus_image_view *iv)
5235bf215546Sopenharmony_ci{
5236bf215546Sopenharmony_ci   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5237bf215546Sopenharmony_ci   uint32_t offset = 0;
5238bf215546Sopenharmony_ci
5239bf215546Sopenharmony_ci   struct crocus_resource *res = (struct crocus_resource *)iv->base.resource;
5240bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5241bf215546Sopenharmony_ci                                       isl_dev->ss.align, &offset);
5242bf215546Sopenharmony_ci   bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE;
5243bf215546Sopenharmony_ci   uint32_t reloc = RELOC_32BIT | (write ? RELOC_WRITE : 0);
5244bf215546Sopenharmony_ci   if (res->base.b.target == PIPE_BUFFER) {
5245bf215546Sopenharmony_ci      const struct isl_format_layout *fmtl = isl_format_get_layout(iv->view.format);
5246bf215546Sopenharmony_ci      const unsigned cpp = iv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;
5247bf215546Sopenharmony_ci      unsigned final_size =
5248bf215546Sopenharmony_ci         MIN3(iv->base.u.buf.size, res->bo->size - res->offset - iv->base.u.buf.offset,
5249bf215546Sopenharmony_ci              CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp);
5250bf215546Sopenharmony_ci      isl_buffer_fill_state(isl_dev, surf_state,
5251bf215546Sopenharmony_ci                            .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5252bf215546Sopenharmony_ci                                                          res->bo,
5253bf215546Sopenharmony_ci                                                          res->offset + iv->base.u.buf.offset, reloc),
5254bf215546Sopenharmony_ci                            .size_B = final_size,
5255bf215546Sopenharmony_ci                            .format = iv->view.format,
5256bf215546Sopenharmony_ci                            .swizzle = iv->view.swizzle,
5257bf215546Sopenharmony_ci                            .stride_B = cpp,
5258bf215546Sopenharmony_ci                            .mocs = crocus_mocs(res->bo, isl_dev)
5259bf215546Sopenharmony_ci         );
5260bf215546Sopenharmony_ci   } else {
5261bf215546Sopenharmony_ci      if (iv->view.format == ISL_FORMAT_RAW) {
5262bf215546Sopenharmony_ci         isl_buffer_fill_state(isl_dev, surf_state,
5263bf215546Sopenharmony_ci                               .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5264bf215546Sopenharmony_ci                                                             res->bo,
5265bf215546Sopenharmony_ci                                                             res->offset, reloc),
5266bf215546Sopenharmony_ci                               .size_B = res->bo->size - res->offset,
5267bf215546Sopenharmony_ci                               .format = iv->view.format,
5268bf215546Sopenharmony_ci                               .swizzle = iv->view.swizzle,
5269bf215546Sopenharmony_ci                               .stride_B = 1,
5270bf215546Sopenharmony_ci                               .mocs = crocus_mocs(res->bo, isl_dev),
5271bf215546Sopenharmony_ci            );
5272bf215546Sopenharmony_ci
5273bf215546Sopenharmony_ci
5274bf215546Sopenharmony_ci      } else {
5275bf215546Sopenharmony_ci         emit_surface_state(batch, res,
5276bf215546Sopenharmony_ci                            &res->surf, false, &iv->view,
5277bf215546Sopenharmony_ci                            write, 0, false,
5278bf215546Sopenharmony_ci                            0, surf_state, offset);
5279bf215546Sopenharmony_ci      }
5280bf215546Sopenharmony_ci   }
5281bf215546Sopenharmony_ci
5282bf215546Sopenharmony_ci   return offset;
5283bf215546Sopenharmony_ci}
5284bf215546Sopenharmony_ci
5285bf215546Sopenharmony_ci#if GFX_VER == 6
5286bf215546Sopenharmony_cistatic uint32_t
5287bf215546Sopenharmony_ciemit_sol_surface(struct crocus_batch *batch,
5288bf215546Sopenharmony_ci                 struct pipe_stream_output_info *so_info,
5289bf215546Sopenharmony_ci                 uint32_t idx)
5290bf215546Sopenharmony_ci{
5291bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
5292bf215546Sopenharmony_ci
5293bf215546Sopenharmony_ci   if (idx >= so_info->num_outputs || !ice->state.streamout_active)
5294bf215546Sopenharmony_ci      return 0;
5295bf215546Sopenharmony_ci   const struct pipe_stream_output *output = &so_info->output[idx];
5296bf215546Sopenharmony_ci   const int buffer = output->output_buffer;
5297bf215546Sopenharmony_ci   assert(output->stream == 0);
5298bf215546Sopenharmony_ci
5299bf215546Sopenharmony_ci   struct crocus_resource *buf = (struct crocus_resource *)ice->state.so_target[buffer]->buffer;
5300bf215546Sopenharmony_ci   unsigned stride_dwords = so_info->stride[buffer];
5301bf215546Sopenharmony_ci   unsigned offset_dwords = ice->state.so_target[buffer]->buffer_offset / 4 + output->dst_offset;
5302bf215546Sopenharmony_ci
5303bf215546Sopenharmony_ci   size_t size_dwords = (ice->state.so_target[buffer]->buffer_offset + ice->state.so_target[buffer]->buffer_size) / 4;
5304bf215546Sopenharmony_ci   unsigned num_vector_components = output->num_components;
5305bf215546Sopenharmony_ci   unsigned num_elements;
5306bf215546Sopenharmony_ci   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
5307bf215546Sopenharmony_ci    * too big to map using a single binding table entry?
5308bf215546Sopenharmony_ci    */
5309bf215546Sopenharmony_ci   //   assert((size_dwords - offset_dwords) / stride_dwords
5310bf215546Sopenharmony_ci   //          <= BRW_MAX_NUM_BUFFER_ENTRIES);
5311bf215546Sopenharmony_ci
5312bf215546Sopenharmony_ci   if (size_dwords > offset_dwords + num_vector_components) {
5313bf215546Sopenharmony_ci      /* There is room for at least 1 transform feedback output in the buffer.
5314bf215546Sopenharmony_ci       * Compute the number of additional transform feedback outputs the
5315bf215546Sopenharmony_ci       * buffer has room for.
5316bf215546Sopenharmony_ci       */
5317bf215546Sopenharmony_ci      num_elements =
5318bf215546Sopenharmony_ci         (size_dwords - offset_dwords - num_vector_components);
5319bf215546Sopenharmony_ci   } else {
5320bf215546Sopenharmony_ci      /* There isn't even room for a single transform feedback output in the
5321bf215546Sopenharmony_ci       * buffer.  We can't configure the binding table entry to prevent output
5322bf215546Sopenharmony_ci       * entirely; we'll have to rely on the geometry shader to detect
5323bf215546Sopenharmony_ci       * overflow.  But to minimize the damage in case of a bug, set up the
5324bf215546Sopenharmony_ci       * binding table entry to just allow a single output.
5325bf215546Sopenharmony_ci       */
5326bf215546Sopenharmony_ci      num_elements = 0;
5327bf215546Sopenharmony_ci   }
5328bf215546Sopenharmony_ci   num_elements += stride_dwords;
5329bf215546Sopenharmony_ci
5330bf215546Sopenharmony_ci   uint32_t surface_format;
5331bf215546Sopenharmony_ci   switch (num_vector_components) {
5332bf215546Sopenharmony_ci   case 1:
5333bf215546Sopenharmony_ci      surface_format = ISL_FORMAT_R32_FLOAT;
5334bf215546Sopenharmony_ci      break;
5335bf215546Sopenharmony_ci   case 2:
5336bf215546Sopenharmony_ci      surface_format = ISL_FORMAT_R32G32_FLOAT;
5337bf215546Sopenharmony_ci      break;
5338bf215546Sopenharmony_ci   case 3:
5339bf215546Sopenharmony_ci      surface_format = ISL_FORMAT_R32G32B32_FLOAT;
5340bf215546Sopenharmony_ci      break;
5341bf215546Sopenharmony_ci   case 4:
5342bf215546Sopenharmony_ci      surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
5343bf215546Sopenharmony_ci      break;
5344bf215546Sopenharmony_ci   default:
5345bf215546Sopenharmony_ci      unreachable("Invalid vector size for transform feedback output");
5346bf215546Sopenharmony_ci   }
5347bf215546Sopenharmony_ci
5348bf215546Sopenharmony_ci   UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
5349bf215546Sopenharmony_ci   uint32_t offset = 0;
5350bf215546Sopenharmony_ci
5351bf215546Sopenharmony_ci   uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,
5352bf215546Sopenharmony_ci                                       isl_dev->ss.align, &offset);
5353bf215546Sopenharmony_ci   isl_buffer_fill_state(isl_dev, surf_state,
5354bf215546Sopenharmony_ci                         .address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,
5355bf215546Sopenharmony_ci                                                       crocus_resource_bo(&buf->base.b),
5356bf215546Sopenharmony_ci                                                       offset_dwords * 4, RELOC_32BIT|RELOC_WRITE),
5357bf215546Sopenharmony_ci                         .size_B = num_elements * 4,
5358bf215546Sopenharmony_ci                         .stride_B = stride_dwords * 4,
5359bf215546Sopenharmony_ci                         .swizzle = ISL_SWIZZLE_IDENTITY,
5360bf215546Sopenharmony_ci                         .format = surface_format);
5361bf215546Sopenharmony_ci   return offset;
5362bf215546Sopenharmony_ci}
5363bf215546Sopenharmony_ci#endif
5364bf215546Sopenharmony_ci
5365bf215546Sopenharmony_ci#define foreach_surface_used(index, group)                      \
5366bf215546Sopenharmony_ci   for (int index = 0; index < bt->sizes[group]; index++)       \
5367bf215546Sopenharmony_ci      if (crocus_group_index_to_bti(bt, group, index) !=        \
5368bf215546Sopenharmony_ci          CROCUS_SURFACE_NOT_USED)
5369bf215546Sopenharmony_ci
5370bf215546Sopenharmony_cistatic void
5371bf215546Sopenharmony_cicrocus_populate_binding_table(struct crocus_context *ice,
5372bf215546Sopenharmony_ci                              struct crocus_batch *batch,
5373bf215546Sopenharmony_ci                              gl_shader_stage stage, bool ff_gs)
5374bf215546Sopenharmony_ci{
5375bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ff_gs ? ice->shaders.ff_gs_prog : ice->shaders.prog[stage];
5376bf215546Sopenharmony_ci   struct crocus_shader_state *shs = ff_gs ? NULL : &ice->state.shaders[stage];
5377bf215546Sopenharmony_ci   if (!shader)
5378bf215546Sopenharmony_ci      return;
5379bf215546Sopenharmony_ci
5380bf215546Sopenharmony_ci   struct crocus_binding_table *bt = &shader->bt;
5381bf215546Sopenharmony_ci   int s = 0;
5382bf215546Sopenharmony_ci   uint32_t *surf_offsets = shader->surf_offset;
5383bf215546Sopenharmony_ci
5384bf215546Sopenharmony_ci#if GFX_VER < 8
5385bf215546Sopenharmony_ci   const struct shader_info *info = crocus_get_shader_info(ice, stage);
5386bf215546Sopenharmony_ci#endif
5387bf215546Sopenharmony_ci
5388bf215546Sopenharmony_ci   if (stage == MESA_SHADER_FRAGMENT) {
5389bf215546Sopenharmony_ci      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5390bf215546Sopenharmony_ci      /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */
5391bf215546Sopenharmony_ci      if (cso_fb->nr_cbufs) {
5392bf215546Sopenharmony_ci         for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
5393bf215546Sopenharmony_ci            uint32_t write_disables = 0;
5394bf215546Sopenharmony_ci            bool blend_enable = false;
5395bf215546Sopenharmony_ci#if GFX_VER <= 5
5396bf215546Sopenharmony_ci            const struct pipe_rt_blend_state *rt =
5397bf215546Sopenharmony_ci               &ice->state.cso_blend->cso.rt[ice->state.cso_blend->cso.independent_blend_enable ? i : 0];
5398bf215546Sopenharmony_ci            struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
5399bf215546Sopenharmony_ci            struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
5400bf215546Sopenharmony_ci            write_disables |= (rt->colormask & PIPE_MASK_A) ? 0x0 : 0x8;
5401bf215546Sopenharmony_ci            write_disables |= (rt->colormask & PIPE_MASK_R) ? 0x0 : 0x4;
5402bf215546Sopenharmony_ci            write_disables |= (rt->colormask & PIPE_MASK_G) ? 0x0 : 0x2;
5403bf215546Sopenharmony_ci            write_disables |= (rt->colormask & PIPE_MASK_B) ? 0x0 : 0x1;
5404bf215546Sopenharmony_ci            /* Gen4/5 can't handle blending off when a dual src blend wm is enabled. */
5405bf215546Sopenharmony_ci            blend_enable = rt->blend_enable || wm_prog_data->dual_src_blend;
5406bf215546Sopenharmony_ci#endif
5407bf215546Sopenharmony_ci            if (cso_fb->cbufs[i]) {
5408bf215546Sopenharmony_ci               surf_offsets[s] = emit_surface(batch,
5409bf215546Sopenharmony_ci                                              (struct crocus_surface *)cso_fb->cbufs[i],
5410bf215546Sopenharmony_ci                                              ice->state.draw_aux_usage[i],
5411bf215546Sopenharmony_ci                                              blend_enable,
5412bf215546Sopenharmony_ci                                              write_disables);
5413bf215546Sopenharmony_ci            } else {
5414bf215546Sopenharmony_ci               emit_null_fb_surface(batch, ice, &surf_offsets[s]);
5415bf215546Sopenharmony_ci            }
5416bf215546Sopenharmony_ci            s++;
5417bf215546Sopenharmony_ci         }
5418bf215546Sopenharmony_ci      } else {
5419bf215546Sopenharmony_ci         emit_null_fb_surface(batch, ice, &surf_offsets[s]);
5420bf215546Sopenharmony_ci         s++;
5421bf215546Sopenharmony_ci      }
5422bf215546Sopenharmony_ci
5423bf215546Sopenharmony_ci      foreach_surface_used(i, CROCUS_SURFACE_GROUP_RENDER_TARGET_READ) {
5424bf215546Sopenharmony_ci         struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5425bf215546Sopenharmony_ci         if (cso_fb->cbufs[i]) {
5426bf215546Sopenharmony_ci            surf_offsets[s++] = emit_rt_surface(batch,
5427bf215546Sopenharmony_ci                                                (struct crocus_surface *)cso_fb->cbufs[i],
5428bf215546Sopenharmony_ci                                                ice->state.draw_aux_usage[i]);
5429bf215546Sopenharmony_ci         }
5430bf215546Sopenharmony_ci      }
5431bf215546Sopenharmony_ci   }
5432bf215546Sopenharmony_ci
5433bf215546Sopenharmony_ci   if (stage == MESA_SHADER_COMPUTE) {
5434bf215546Sopenharmony_ci      foreach_surface_used(i, CROCUS_SURFACE_GROUP_CS_WORK_GROUPS) {
5435bf215546Sopenharmony_ci         surf_offsets[s] = emit_grid(ice, batch);
5436bf215546Sopenharmony_ci         s++;
5437bf215546Sopenharmony_ci      }
5438bf215546Sopenharmony_ci   }
5439bf215546Sopenharmony_ci
5440bf215546Sopenharmony_ci#if GFX_VER == 6
5441bf215546Sopenharmony_ci   if (stage == MESA_SHADER_GEOMETRY) {
5442bf215546Sopenharmony_ci      struct pipe_stream_output_info *so_info;
5443bf215546Sopenharmony_ci      if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
5444bf215546Sopenharmony_ci         so_info = &ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]->stream_output;
5445bf215546Sopenharmony_ci      else
5446bf215546Sopenharmony_ci         so_info = &ice->shaders.uncompiled[MESA_SHADER_VERTEX]->stream_output;
5447bf215546Sopenharmony_ci
5448bf215546Sopenharmony_ci      foreach_surface_used(i, CROCUS_SURFACE_GROUP_SOL) {
5449bf215546Sopenharmony_ci         surf_offsets[s] = emit_sol_surface(batch, so_info, i);
5450bf215546Sopenharmony_ci         s++;
5451bf215546Sopenharmony_ci      }
5452bf215546Sopenharmony_ci   }
5453bf215546Sopenharmony_ci#endif
5454bf215546Sopenharmony_ci
5455bf215546Sopenharmony_ci   foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE) {
5456bf215546Sopenharmony_ci      struct crocus_sampler_view *view = shs->textures[i];
5457bf215546Sopenharmony_ci      if (view)
5458bf215546Sopenharmony_ci         surf_offsets[s] = emit_sampler_view(ice, batch, false, view);
5459bf215546Sopenharmony_ci      else
5460bf215546Sopenharmony_ci         emit_null_surface(batch, &surf_offsets[s]);
5461bf215546Sopenharmony_ci      s++;
5462bf215546Sopenharmony_ci   }
5463bf215546Sopenharmony_ci
5464bf215546Sopenharmony_ci#if GFX_VER < 8
5465bf215546Sopenharmony_ci   if (info && info->uses_texture_gather) {
5466bf215546Sopenharmony_ci      foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE_GATHER) {
5467bf215546Sopenharmony_ci         struct crocus_sampler_view *view = shs->textures[i];
5468bf215546Sopenharmony_ci         if (view)
5469bf215546Sopenharmony_ci            surf_offsets[s] = emit_sampler_view(ice, batch, true, view);
5470bf215546Sopenharmony_ci         else
5471bf215546Sopenharmony_ci            emit_null_surface(batch, &surf_offsets[s]);
5472bf215546Sopenharmony_ci         s++;
5473bf215546Sopenharmony_ci      }
5474bf215546Sopenharmony_ci   }
5475bf215546Sopenharmony_ci#endif
5476bf215546Sopenharmony_ci
5477bf215546Sopenharmony_ci   foreach_surface_used(i, CROCUS_SURFACE_GROUP_IMAGE) {
5478bf215546Sopenharmony_ci      struct crocus_image_view *view = &shs->image[i];
5479bf215546Sopenharmony_ci      if (view->base.resource)
5480bf215546Sopenharmony_ci         surf_offsets[s] = emit_image_view(ice, batch, view);
5481bf215546Sopenharmony_ci      else
5482bf215546Sopenharmony_ci         emit_null_surface(batch, &surf_offsets[s]);
5483bf215546Sopenharmony_ci      s++;
5484bf215546Sopenharmony_ci   }
5485bf215546Sopenharmony_ci   foreach_surface_used(i, CROCUS_SURFACE_GROUP_UBO) {
5486bf215546Sopenharmony_ci      if (shs->constbufs[i].buffer)
5487bf215546Sopenharmony_ci         surf_offsets[s] = emit_ubo_buffer(ice, batch, &shs->constbufs[i]);
5488bf215546Sopenharmony_ci      else
5489bf215546Sopenharmony_ci         emit_null_surface(batch, &surf_offsets[s]);
5490bf215546Sopenharmony_ci      s++;
5491bf215546Sopenharmony_ci   }
5492bf215546Sopenharmony_ci   foreach_surface_used(i, CROCUS_SURFACE_GROUP_SSBO) {
5493bf215546Sopenharmony_ci      if (shs->ssbo[i].buffer)
5494bf215546Sopenharmony_ci         surf_offsets[s] = emit_ssbo_buffer(ice, batch, &shs->ssbo[i],
5495bf215546Sopenharmony_ci                                            !!(shs->writable_ssbos & (1 << i)));
5496bf215546Sopenharmony_ci      else
5497bf215546Sopenharmony_ci         emit_null_surface(batch, &surf_offsets[s]);
5498bf215546Sopenharmony_ci      s++;
5499bf215546Sopenharmony_ci   }
5500bf215546Sopenharmony_ci
5501bf215546Sopenharmony_ci}
5502bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */
5503bf215546Sopenharmony_cistatic uint32_t
5504bf215546Sopenharmony_cicrocus_upload_binding_table(struct crocus_context *ice,
5505bf215546Sopenharmony_ci                            struct crocus_batch *batch,
5506bf215546Sopenharmony_ci                            uint32_t *table,
5507bf215546Sopenharmony_ci                            uint32_t size)
5508bf215546Sopenharmony_ci
5509bf215546Sopenharmony_ci{
5510bf215546Sopenharmony_ci   if (size == 0)
5511bf215546Sopenharmony_ci      return 0;
5512bf215546Sopenharmony_ci   return emit_state(batch, table, size, 32);
5513bf215546Sopenharmony_ci}
5514bf215546Sopenharmony_ci
5515bf215546Sopenharmony_ci/**
5516bf215546Sopenharmony_ci * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address.
5517bf215546Sopenharmony_ci */
5518bf215546Sopenharmony_ci
5519bf215546Sopenharmony_cistatic void
5520bf215546Sopenharmony_cicrocus_update_surface_base_address(struct crocus_batch *batch)
5521bf215546Sopenharmony_ci{
5522bf215546Sopenharmony_ci   if (batch->state_base_address_emitted)
5523bf215546Sopenharmony_ci      return;
5524bf215546Sopenharmony_ci
5525bf215546Sopenharmony_ci   UNUSED uint32_t mocs = batch->screen->isl_dev.mocs.internal;
5526bf215546Sopenharmony_ci
5527bf215546Sopenharmony_ci   flush_before_state_base_change(batch);
5528bf215546Sopenharmony_ci
5529bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
5530bf215546Sopenharmony_ci      /* Set base addresses */
5531bf215546Sopenharmony_ci      sba.GeneralStateBaseAddressModifyEnable = true;
5532bf215546Sopenharmony_ci
5533bf215546Sopenharmony_ci#if GFX_VER >= 6
5534bf215546Sopenharmony_ci      sba.DynamicStateBaseAddressModifyEnable = true;
5535bf215546Sopenharmony_ci      sba.DynamicStateBaseAddress = ro_bo(batch->state.bo, 0);
5536bf215546Sopenharmony_ci#endif
5537bf215546Sopenharmony_ci
5538bf215546Sopenharmony_ci      sba.SurfaceStateBaseAddressModifyEnable = true;
5539bf215546Sopenharmony_ci      sba.SurfaceStateBaseAddress = ro_bo(batch->state.bo, 0);
5540bf215546Sopenharmony_ci
5541bf215546Sopenharmony_ci      sba.IndirectObjectBaseAddressModifyEnable = true;
5542bf215546Sopenharmony_ci
5543bf215546Sopenharmony_ci#if GFX_VER >= 5
5544bf215546Sopenharmony_ci      sba.InstructionBaseAddressModifyEnable = true;
5545bf215546Sopenharmony_ci      sba.InstructionBaseAddress = ro_bo(batch->ice->shaders.cache_bo, 0); // TODO!
5546bf215546Sopenharmony_ci#endif
5547bf215546Sopenharmony_ci
5548bf215546Sopenharmony_ci      /* Set buffer sizes on Gen8+ or upper bounds on Gen4-7 */
5549bf215546Sopenharmony_ci#if GFX_VER == 8
5550bf215546Sopenharmony_ci      sba.GeneralStateBufferSize   = 0xfffff;
5551bf215546Sopenharmony_ci      sba.IndirectObjectBufferSize = 0xfffff;
5552bf215546Sopenharmony_ci      sba.InstructionBufferSize    = 0xfffff;
5553bf215546Sopenharmony_ci      sba.DynamicStateBufferSize   = MAX_STATE_SIZE;
5554bf215546Sopenharmony_ci
5555bf215546Sopenharmony_ci      sba.GeneralStateBufferSizeModifyEnable    = true;
5556bf215546Sopenharmony_ci      sba.DynamicStateBufferSizeModifyEnable    = true;
5557bf215546Sopenharmony_ci      sba.IndirectObjectBufferSizeModifyEnable  = true;
5558bf215546Sopenharmony_ci      sba.InstructionBuffersizeModifyEnable     = true;
5559bf215546Sopenharmony_ci#else
5560bf215546Sopenharmony_ci      sba.GeneralStateAccessUpperBoundModifyEnable = true;
5561bf215546Sopenharmony_ci      sba.IndirectObjectAccessUpperBoundModifyEnable = true;
5562bf215546Sopenharmony_ci
5563bf215546Sopenharmony_ci#if GFX_VER >= 5
5564bf215546Sopenharmony_ci      sba.InstructionAccessUpperBoundModifyEnable = true;
5565bf215546Sopenharmony_ci#endif
5566bf215546Sopenharmony_ci
5567bf215546Sopenharmony_ci#if GFX_VER >= 6
5568bf215546Sopenharmony_ci      /* Dynamic state upper bound.  Although the documentation says that
5569bf215546Sopenharmony_ci       * programming it to zero will cause it to be ignored, that is a lie.
5570bf215546Sopenharmony_ci       * If this isn't programmed to a real bound, the sampler border color
5571bf215546Sopenharmony_ci       * pointer is rejected, causing border color to mysteriously fail.
5572bf215546Sopenharmony_ci       */
5573bf215546Sopenharmony_ci      sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
5574bf215546Sopenharmony_ci      sba.DynamicStateAccessUpperBoundModifyEnable = true;
5575bf215546Sopenharmony_ci#else
5576bf215546Sopenharmony_ci      /* Same idea but using General State Base Address on Gen4-5 */
5577bf215546Sopenharmony_ci      sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
5578bf215546Sopenharmony_ci#endif
5579bf215546Sopenharmony_ci#endif
5580bf215546Sopenharmony_ci
5581bf215546Sopenharmony_ci#if GFX_VER >= 6
5582bf215546Sopenharmony_ci      /* The hardware appears to pay attention to the MOCS fields even
5583bf215546Sopenharmony_ci       * if you don't set the "Address Modify Enable" bit for the base.
5584bf215546Sopenharmony_ci       */
5585bf215546Sopenharmony_ci      sba.GeneralStateMOCS            = mocs;
5586bf215546Sopenharmony_ci      sba.StatelessDataPortAccessMOCS = mocs;
5587bf215546Sopenharmony_ci      sba.DynamicStateMOCS            = mocs;
5588bf215546Sopenharmony_ci      sba.IndirectObjectMOCS          = mocs;
5589bf215546Sopenharmony_ci      sba.InstructionMOCS             = mocs;
5590bf215546Sopenharmony_ci      sba.SurfaceStateMOCS            = mocs;
5591bf215546Sopenharmony_ci#endif
5592bf215546Sopenharmony_ci   }
5593bf215546Sopenharmony_ci
5594bf215546Sopenharmony_ci   flush_after_state_base_change(batch);
5595bf215546Sopenharmony_ci
5596bf215546Sopenharmony_ci   /* According to section 3.6.1 of VOL1 of the 965 PRM,
5597bf215546Sopenharmony_ci    * STATE_BASE_ADDRESS updates require a reissue of:
5598bf215546Sopenharmony_ci    *
5599bf215546Sopenharmony_ci    * 3DSTATE_PIPELINE_POINTERS
5600bf215546Sopenharmony_ci    * 3DSTATE_BINDING_TABLE_POINTERS
5601bf215546Sopenharmony_ci    * MEDIA_STATE_POINTERS
5602bf215546Sopenharmony_ci    *
5603bf215546Sopenharmony_ci    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
5604bf215546Sopenharmony_ci    * 1 part 1 says that the folowing packets must be reissued:
5605bf215546Sopenharmony_ci    *
5606bf215546Sopenharmony_ci    * 3DSTATE_CC_POINTERS
5607bf215546Sopenharmony_ci    * 3DSTATE_BINDING_TABLE_POINTERS
5608bf215546Sopenharmony_ci    * 3DSTATE_SAMPLER_STATE_POINTERS
5609bf215546Sopenharmony_ci    * 3DSTATE_VIEWPORT_STATE_POINTERS
5610bf215546Sopenharmony_ci    * MEDIA_STATE_POINTERS
5611bf215546Sopenharmony_ci    *
5612bf215546Sopenharmony_ci    * Those are always reissued following SBA updates anyway (new
5613bf215546Sopenharmony_ci    * batch time), except in the case of the program cache BO
5614bf215546Sopenharmony_ci    * changing.  Having a separate state flag makes the sequence more
5615bf215546Sopenharmony_ci    * obvious.
5616bf215546Sopenharmony_ci    */
5617bf215546Sopenharmony_ci#if GFX_VER <= 5
5618bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS | CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;
5619bf215546Sopenharmony_ci#elif GFX_VER == 6
5620bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS | CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS;
5621bf215546Sopenharmony_ci#endif
5622bf215546Sopenharmony_ci   batch->state_base_address_emitted = true;
5623bf215546Sopenharmony_ci}
5624bf215546Sopenharmony_ci
5625bf215546Sopenharmony_cistatic inline void
5626bf215546Sopenharmony_cicrocus_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
5627bf215546Sopenharmony_ci                          bool window_space_position, float *zmin, float *zmax)
5628bf215546Sopenharmony_ci{
5629bf215546Sopenharmony_ci   if (window_space_position) {
5630bf215546Sopenharmony_ci      *zmin = 0.f;
5631bf215546Sopenharmony_ci      *zmax = 1.f;
5632bf215546Sopenharmony_ci      return;
5633bf215546Sopenharmony_ci   }
5634bf215546Sopenharmony_ci   util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
5635bf215546Sopenharmony_ci}
5636bf215546Sopenharmony_ci
5637bf215546Sopenharmony_cistruct push_bos {
5638bf215546Sopenharmony_ci   struct {
5639bf215546Sopenharmony_ci      struct crocus_address addr;
5640bf215546Sopenharmony_ci      uint32_t length;
5641bf215546Sopenharmony_ci   } buffers[4];
5642bf215546Sopenharmony_ci   int buffer_count;
5643bf215546Sopenharmony_ci   uint32_t max_length;
5644bf215546Sopenharmony_ci};
5645bf215546Sopenharmony_ci
5646bf215546Sopenharmony_ci#if GFX_VER >= 6
5647bf215546Sopenharmony_cistatic void
5648bf215546Sopenharmony_cisetup_constant_buffers(struct crocus_context *ice,
5649bf215546Sopenharmony_ci                       struct crocus_batch *batch,
5650bf215546Sopenharmony_ci                       int stage,
5651bf215546Sopenharmony_ci                       struct push_bos *push_bos)
5652bf215546Sopenharmony_ci{
5653bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[stage];
5654bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
5655bf215546Sopenharmony_ci   struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
5656bf215546Sopenharmony_ci
5657bf215546Sopenharmony_ci   uint32_t push_range_sum = 0;
5658bf215546Sopenharmony_ci
5659bf215546Sopenharmony_ci   int n = 0;
5660bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
5661bf215546Sopenharmony_ci      const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
5662bf215546Sopenharmony_ci
5663bf215546Sopenharmony_ci      if (range->length == 0)
5664bf215546Sopenharmony_ci         continue;
5665bf215546Sopenharmony_ci
5666bf215546Sopenharmony_ci      push_range_sum += range->length;
5667bf215546Sopenharmony_ci
5668bf215546Sopenharmony_ci      if (range->length > push_bos->max_length)
5669bf215546Sopenharmony_ci         push_bos->max_length = range->length;
5670bf215546Sopenharmony_ci
5671bf215546Sopenharmony_ci      /* Range block is a binding table index, map back to UBO index. */
5672bf215546Sopenharmony_ci      unsigned block_index = crocus_bti_to_group_index(
5673bf215546Sopenharmony_ci         &shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block);
5674bf215546Sopenharmony_ci      assert(block_index != CROCUS_SURFACE_NOT_USED);
5675bf215546Sopenharmony_ci
5676bf215546Sopenharmony_ci      struct pipe_constant_buffer *cbuf = &shs->constbufs[block_index];
5677bf215546Sopenharmony_ci      struct crocus_resource *res = (void *) cbuf->buffer;
5678bf215546Sopenharmony_ci
5679bf215546Sopenharmony_ci      assert(cbuf->buffer_offset % 32 == 0);
5680bf215546Sopenharmony_ci
5681bf215546Sopenharmony_ci      push_bos->buffers[n].length = range->length;
5682bf215546Sopenharmony_ci      push_bos->buffers[n].addr =
5683bf215546Sopenharmony_ci         res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset)
5684bf215546Sopenharmony_ci         : ro_bo(batch->ice->workaround_bo,
5685bf215546Sopenharmony_ci                 batch->ice->workaround_offset);
5686bf215546Sopenharmony_ci      n++;
5687bf215546Sopenharmony_ci   }
5688bf215546Sopenharmony_ci
5689bf215546Sopenharmony_ci   /* From the 3DSTATE_CONSTANT_XS and 3DSTATE_CONSTANT_ALL programming notes:
5690bf215546Sopenharmony_ci    *
5691bf215546Sopenharmony_ci    *    "The sum of all four read length fields must be less than or
5692bf215546Sopenharmony_ci    *    equal to the size of 64."
5693bf215546Sopenharmony_ci    */
5694bf215546Sopenharmony_ci   assert(push_range_sum <= 64);
5695bf215546Sopenharmony_ci
5696bf215546Sopenharmony_ci   push_bos->buffer_count = n;
5697bf215546Sopenharmony_ci}
5698bf215546Sopenharmony_ci
5699bf215546Sopenharmony_ci#if GFX_VER == 7
5700bf215546Sopenharmony_cistatic void
5701bf215546Sopenharmony_cigen7_emit_vs_workaround_flush(struct crocus_batch *batch)
5702bf215546Sopenharmony_ci{
5703bf215546Sopenharmony_ci   crocus_emit_pipe_control_write(batch,
5704bf215546Sopenharmony_ci                                  "vs workaround",
5705bf215546Sopenharmony_ci                                  PIPE_CONTROL_WRITE_IMMEDIATE
5706bf215546Sopenharmony_ci                                  | PIPE_CONTROL_DEPTH_STALL,
5707bf215546Sopenharmony_ci                                  batch->ice->workaround_bo,
5708bf215546Sopenharmony_ci                                  batch->ice->workaround_offset, 0);
5709bf215546Sopenharmony_ci}
5710bf215546Sopenharmony_ci#endif
5711bf215546Sopenharmony_ci
5712bf215546Sopenharmony_cistatic void
5713bf215546Sopenharmony_ciemit_push_constant_packets(struct crocus_context *ice,
5714bf215546Sopenharmony_ci                           struct crocus_batch *batch,
5715bf215546Sopenharmony_ci                           int stage,
5716bf215546Sopenharmony_ci                           const struct push_bos *push_bos)
5717bf215546Sopenharmony_ci{
5718bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
5719bf215546Sopenharmony_ci   struct brw_stage_prog_data *prog_data = shader ? (void *) shader->prog_data : NULL;
5720bf215546Sopenharmony_ci   UNUSED uint32_t mocs = crocus_mocs(NULL, &batch->screen->isl_dev);
5721bf215546Sopenharmony_ci
5722bf215546Sopenharmony_ci#if GFX_VER == 7
5723bf215546Sopenharmony_ci   if (stage == MESA_SHADER_VERTEX) {
5724bf215546Sopenharmony_ci      if (batch->screen->devinfo.platform == INTEL_PLATFORM_IVB)
5725bf215546Sopenharmony_ci         gen7_emit_vs_workaround_flush(batch);
5726bf215546Sopenharmony_ci   }
5727bf215546Sopenharmony_ci#endif
5728bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
5729bf215546Sopenharmony_ci      pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
5730bf215546Sopenharmony_ci#if GFX_VER >= 7
5731bf215546Sopenharmony_ci#if GFX_VER != 8
5732bf215546Sopenharmony_ci      /* MOCS is MBZ on Gen8 so we skip it there */
5733bf215546Sopenharmony_ci      pkt.ConstantBody.MOCS = mocs;
5734bf215546Sopenharmony_ci#endif
5735bf215546Sopenharmony_ci
5736bf215546Sopenharmony_ci      if (prog_data) {
5737bf215546Sopenharmony_ci         /* The Skylake PRM contains the following restriction:
5738bf215546Sopenharmony_ci          *
5739bf215546Sopenharmony_ci          *    "The driver must ensure The following case does not occur
5740bf215546Sopenharmony_ci          *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
5741bf215546Sopenharmony_ci          *     buffer 3 read length equal to zero committed followed by a
5742bf215546Sopenharmony_ci          *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
5743bf215546Sopenharmony_ci          *     zero committed."
5744bf215546Sopenharmony_ci          *
5745bf215546Sopenharmony_ci          * To avoid this, we program the buffers in the highest slots.
5746bf215546Sopenharmony_ci          * This way, slot 0 is only used if slot 3 is also used.
5747bf215546Sopenharmony_ci          */
5748bf215546Sopenharmony_ci         int n = push_bos->buffer_count;
5749bf215546Sopenharmony_ci         assert(n <= 4);
5750bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
5751bf215546Sopenharmony_ci         const unsigned shift = 4 - n;
5752bf215546Sopenharmony_ci#else
5753bf215546Sopenharmony_ci         const unsigned shift = 0;
5754bf215546Sopenharmony_ci#endif
5755bf215546Sopenharmony_ci         for (int i = 0; i < n; i++) {
5756bf215546Sopenharmony_ci            pkt.ConstantBody.ReadLength[i + shift] =
5757bf215546Sopenharmony_ci               push_bos->buffers[i].length;
5758bf215546Sopenharmony_ci            pkt.ConstantBody.Buffer[i + shift] = push_bos->buffers[i].addr;
5759bf215546Sopenharmony_ci         }
5760bf215546Sopenharmony_ci      }
5761bf215546Sopenharmony_ci#else
5762bf215546Sopenharmony_ci      if (prog_data) {
5763bf215546Sopenharmony_ci         int n = push_bos->buffer_count;
5764bf215546Sopenharmony_ci         assert (n <= 1);
5765bf215546Sopenharmony_ci         if (n == 1) {
5766bf215546Sopenharmony_ci            pkt.Buffer0Valid = true;
5767bf215546Sopenharmony_ci            pkt.ConstantBody.PointertoConstantBuffer0 = push_bos->buffers[0].addr.offset;
5768bf215546Sopenharmony_ci            pkt.ConstantBody.ConstantBuffer0ReadLength = push_bos->buffers[0].length - 1;
5769bf215546Sopenharmony_ci         }
5770bf215546Sopenharmony_ci      }
5771bf215546Sopenharmony_ci#endif
5772bf215546Sopenharmony_ci   }
5773bf215546Sopenharmony_ci}
5774bf215546Sopenharmony_ci
5775bf215546Sopenharmony_ci#endif
5776bf215546Sopenharmony_ci
5777bf215546Sopenharmony_ci#if GFX_VER == 8
5778bf215546Sopenharmony_citypedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
5779bf215546Sopenharmony_ci#elif GFX_VER >= 6
5780bf215546Sopenharmony_citypedef struct GENX(DEPTH_STENCIL_STATE)      DEPTH_STENCIL_GENXML;
5781bf215546Sopenharmony_ci#else
5782bf215546Sopenharmony_citypedef struct GENX(COLOR_CALC_STATE)         DEPTH_STENCIL_GENXML;
5783bf215546Sopenharmony_ci#endif
5784bf215546Sopenharmony_ci
5785bf215546Sopenharmony_cistatic inline void
5786bf215546Sopenharmony_ciset_depth_stencil_bits(struct crocus_context *ice, DEPTH_STENCIL_GENXML *ds)
5787bf215546Sopenharmony_ci{
5788bf215546Sopenharmony_ci   struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
5789bf215546Sopenharmony_ci   ds->DepthTestEnable = cso->cso.depth_enabled;
5790bf215546Sopenharmony_ci   ds->DepthBufferWriteEnable = cso->cso.depth_writemask;
5791bf215546Sopenharmony_ci   ds->DepthTestFunction = translate_compare_func(cso->cso.depth_func);
5792bf215546Sopenharmony_ci
5793bf215546Sopenharmony_ci   ds->StencilFailOp = cso->cso.stencil[0].fail_op;
5794bf215546Sopenharmony_ci   ds->StencilPassDepthFailOp = cso->cso.stencil[0].zfail_op;
5795bf215546Sopenharmony_ci   ds->StencilPassDepthPassOp = cso->cso.stencil[0].zpass_op;
5796bf215546Sopenharmony_ci   ds->StencilTestFunction = translate_compare_func(cso->cso.stencil[0].func);
5797bf215546Sopenharmony_ci
5798bf215546Sopenharmony_ci   ds->StencilTestMask = cso->cso.stencil[0].valuemask;
5799bf215546Sopenharmony_ci   ds->StencilWriteMask = cso->cso.stencil[0].writemask;
5800bf215546Sopenharmony_ci
5801bf215546Sopenharmony_ci   ds->BackfaceStencilFailOp = cso->cso.stencil[1].fail_op;
5802bf215546Sopenharmony_ci   ds->BackfaceStencilPassDepthFailOp = cso->cso.stencil[1].zfail_op;
5803bf215546Sopenharmony_ci   ds->BackfaceStencilPassDepthPassOp = cso->cso.stencil[1].zpass_op;
5804bf215546Sopenharmony_ci   ds->BackfaceStencilTestFunction = translate_compare_func(cso->cso.stencil[1].func);
5805bf215546Sopenharmony_ci
5806bf215546Sopenharmony_ci   ds->BackfaceStencilTestMask = cso->cso.stencil[1].valuemask;
5807bf215546Sopenharmony_ci   ds->BackfaceStencilWriteMask = cso->cso.stencil[1].writemask;
5808bf215546Sopenharmony_ci   ds->DoubleSidedStencilEnable = cso->cso.stencil[1].enabled;
5809bf215546Sopenharmony_ci   ds->StencilTestEnable = cso->cso.stencil[0].enabled;
5810bf215546Sopenharmony_ci   ds->StencilBufferWriteEnable =
5811bf215546Sopenharmony_ci      cso->cso.stencil[0].writemask != 0 ||
5812bf215546Sopenharmony_ci      (cso->cso.stencil[1].enabled && cso->cso.stencil[1].writemask != 0);
5813bf215546Sopenharmony_ci}
5814bf215546Sopenharmony_ci
5815bf215546Sopenharmony_cistatic void
5816bf215546Sopenharmony_ciemit_vertex_buffer_state(struct crocus_batch *batch,
5817bf215546Sopenharmony_ci                         unsigned buffer_id,
5818bf215546Sopenharmony_ci                         struct crocus_bo *bo,
5819bf215546Sopenharmony_ci                         unsigned start_offset,
5820bf215546Sopenharmony_ci                         unsigned end_offset,
5821bf215546Sopenharmony_ci                         unsigned stride,
5822bf215546Sopenharmony_ci                         unsigned step_rate,
5823bf215546Sopenharmony_ci                         uint32_t **map)
5824bf215546Sopenharmony_ci{
5825bf215546Sopenharmony_ci   const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
5826bf215546Sopenharmony_ci   _crocus_pack_state(batch, GENX(VERTEX_BUFFER_STATE), *map, vb) {
5827bf215546Sopenharmony_ci      vb.BufferStartingAddress = ro_bo(bo, start_offset);
5828bf215546Sopenharmony_ci#if GFX_VER >= 8
5829bf215546Sopenharmony_ci      vb.BufferSize = end_offset - start_offset;
5830bf215546Sopenharmony_ci#endif
5831bf215546Sopenharmony_ci      vb.VertexBufferIndex = buffer_id;
5832bf215546Sopenharmony_ci      vb.BufferPitch = stride;
5833bf215546Sopenharmony_ci#if GFX_VER >= 7
5834bf215546Sopenharmony_ci      vb.AddressModifyEnable = true;
5835bf215546Sopenharmony_ci#endif
5836bf215546Sopenharmony_ci#if GFX_VER >= 6
5837bf215546Sopenharmony_ci      vb.MOCS = crocus_mocs(bo, &batch->screen->isl_dev);
5838bf215546Sopenharmony_ci#endif
5839bf215546Sopenharmony_ci#if GFX_VER < 8
5840bf215546Sopenharmony_ci      vb.BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA;
5841bf215546Sopenharmony_ci      vb.InstanceDataStepRate = step_rate;
5842bf215546Sopenharmony_ci#if GFX_VER >= 5
5843bf215546Sopenharmony_ci      vb.EndAddress = ro_bo(bo, end_offset - 1);
5844bf215546Sopenharmony_ci#endif
5845bf215546Sopenharmony_ci#endif
5846bf215546Sopenharmony_ci   }
5847bf215546Sopenharmony_ci   *map += vb_dwords;
5848bf215546Sopenharmony_ci}
5849bf215546Sopenharmony_ci
5850bf215546Sopenharmony_ci#if GFX_VER >= 6
5851bf215546Sopenharmony_cistatic uint32_t
5852bf215546Sopenharmony_cidetermine_sample_mask(struct crocus_context *ice)
5853bf215546Sopenharmony_ci{
5854bf215546Sopenharmony_ci   uint32_t num_samples = ice->state.framebuffer.samples;
5855bf215546Sopenharmony_ci
5856bf215546Sopenharmony_ci   if (num_samples <= 1)
5857bf215546Sopenharmony_ci      return 1;
5858bf215546Sopenharmony_ci
5859bf215546Sopenharmony_ci   uint32_t fb_mask = (1 << num_samples) - 1;
5860bf215546Sopenharmony_ci   return ice->state.sample_mask & fb_mask;
5861bf215546Sopenharmony_ci}
5862bf215546Sopenharmony_ci#endif
5863bf215546Sopenharmony_ci
5864bf215546Sopenharmony_cistatic void
5865bf215546Sopenharmony_cicrocus_upload_dirty_render_state(struct crocus_context *ice,
5866bf215546Sopenharmony_ci                               struct crocus_batch *batch,
5867bf215546Sopenharmony_ci                               const struct pipe_draw_info *draw)
5868bf215546Sopenharmony_ci{
5869bf215546Sopenharmony_ci   uint64_t dirty = ice->state.dirty;
5870bf215546Sopenharmony_ci   uint64_t stage_dirty = ice->state.stage_dirty;
5871bf215546Sopenharmony_ci
5872bf215546Sopenharmony_ci   if (!(dirty & CROCUS_ALL_DIRTY_FOR_RENDER) &&
5873bf215546Sopenharmony_ci       !(stage_dirty & CROCUS_ALL_STAGE_DIRTY_FOR_RENDER))
5874bf215546Sopenharmony_ci      return;
5875bf215546Sopenharmony_ci
5876bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_VF_STATISTICS) {
5877bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
5878bf215546Sopenharmony_ci         vf.StatisticsEnable = true;
5879bf215546Sopenharmony_ci      }
5880bf215546Sopenharmony_ci   }
5881bf215546Sopenharmony_ci
5882bf215546Sopenharmony_ci#if GFX_VER <= 5
5883bf215546Sopenharmony_ci   if (stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS |
5884bf215546Sopenharmony_ci                      CROCUS_STAGE_DIRTY_CONSTANTS_FS)) {
5885bf215546Sopenharmony_ci      bool ret = calculate_curbe_offsets(batch);
5886bf215546Sopenharmony_ci      if (ret) {
5887bf215546Sopenharmony_ci         dirty |= CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_WM | CROCUS_DIRTY_CLIP;
5888bf215546Sopenharmony_ci         stage_dirty |= CROCUS_STAGE_DIRTY_VS;
5889bf215546Sopenharmony_ci      }
5890bf215546Sopenharmony_ci   }
5891bf215546Sopenharmony_ci
5892bf215546Sopenharmony_ci   if (dirty & (CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_RASTER) ||
5893bf215546Sopenharmony_ci       stage_dirty & CROCUS_STAGE_DIRTY_VS) {
5894bf215546Sopenharmony_ci     bool ret = crocus_calculate_urb_fence(batch, ice->curbe.total_size,
5895bf215546Sopenharmony_ci                                           brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->urb_entry_size,
5896bf215546Sopenharmony_ci                                           ((struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data)->urb_entry_size);
5897bf215546Sopenharmony_ci     if (ret) {
5898bf215546Sopenharmony_ci	dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS | CROCUS_DIRTY_RASTER | CROCUS_DIRTY_CLIP;
5899bf215546Sopenharmony_ci	stage_dirty |= CROCUS_STAGE_DIRTY_GS | CROCUS_STAGE_DIRTY_VS;
5900bf215546Sopenharmony_ci     }
5901bf215546Sopenharmony_ci   }
5902bf215546Sopenharmony_ci#endif
5903bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_CC_VIEWPORT) {
5904bf215546Sopenharmony_ci      const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
5905bf215546Sopenharmony_ci      uint32_t cc_vp_address;
5906bf215546Sopenharmony_ci
5907bf215546Sopenharmony_ci      /* XXX: could avoid streaming for depth_clip [0,1] case. */
5908bf215546Sopenharmony_ci      uint32_t *cc_vp_map =
5909bf215546Sopenharmony_ci         stream_state(batch,
5910bf215546Sopenharmony_ci                      4 * ice->state.num_viewports *
5911bf215546Sopenharmony_ci                      GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
5912bf215546Sopenharmony_ci      for (int i = 0; i < ice->state.num_viewports; i++) {
5913bf215546Sopenharmony_ci         float zmin, zmax;
5914bf215546Sopenharmony_ci         crocus_viewport_zmin_zmax(&ice->state.viewports[i], cso_rast->cso.clip_halfz,
5915bf215546Sopenharmony_ci                                 ice->state.window_space_position,
5916bf215546Sopenharmony_ci                                 &zmin, &zmax);
5917bf215546Sopenharmony_ci         if (cso_rast->cso.depth_clip_near)
5918bf215546Sopenharmony_ci            zmin = 0.0;
5919bf215546Sopenharmony_ci         if (cso_rast->cso.depth_clip_far)
5920bf215546Sopenharmony_ci            zmax = 1.0;
5921bf215546Sopenharmony_ci
5922bf215546Sopenharmony_ci         crocus_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
5923bf215546Sopenharmony_ci            ccv.MinimumDepth = zmin;
5924bf215546Sopenharmony_ci            ccv.MaximumDepth = zmax;
5925bf215546Sopenharmony_ci         }
5926bf215546Sopenharmony_ci
5927bf215546Sopenharmony_ci         cc_vp_map += GENX(CC_VIEWPORT_length);
5928bf215546Sopenharmony_ci      }
5929bf215546Sopenharmony_ci
5930bf215546Sopenharmony_ci#if GFX_VER >= 7
5931bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
5932bf215546Sopenharmony_ci         ptr.CCViewportPointer = cc_vp_address;
5933bf215546Sopenharmony_ci      }
5934bf215546Sopenharmony_ci#elif GFX_VER == 6
5935bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
5936bf215546Sopenharmony_ci         vp.CCViewportStateChange = 1;
5937bf215546Sopenharmony_ci         vp.PointertoCC_VIEWPORT = cc_vp_address;
5938bf215546Sopenharmony_ci      }
5939bf215546Sopenharmony_ci#else
5940bf215546Sopenharmony_ci      ice->state.cc_vp_address = cc_vp_address;
5941bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
5942bf215546Sopenharmony_ci#endif
5943bf215546Sopenharmony_ci   }
5944bf215546Sopenharmony_ci
5945bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_SF_CL_VIEWPORT) {
5946bf215546Sopenharmony_ci      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
5947bf215546Sopenharmony_ci#if GFX_VER >= 7
5948bf215546Sopenharmony_ci      uint32_t sf_cl_vp_address;
5949bf215546Sopenharmony_ci      uint32_t *vp_map =
5950bf215546Sopenharmony_ci         stream_state(batch,
5951bf215546Sopenharmony_ci                      4 * ice->state.num_viewports *
5952bf215546Sopenharmony_ci                      GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address);
5953bf215546Sopenharmony_ci#else
5954bf215546Sopenharmony_ci      uint32_t *vp_map =
5955bf215546Sopenharmony_ci         stream_state(batch,
5956bf215546Sopenharmony_ci                      4 * ice->state.num_viewports * GENX(SF_VIEWPORT_length),
5957bf215546Sopenharmony_ci                      32, &ice->state.sf_vp_address);
5958bf215546Sopenharmony_ci      uint32_t *clip_map =
5959bf215546Sopenharmony_ci         stream_state(batch,
5960bf215546Sopenharmony_ci                      4 * ice->state.num_viewports * GENX(CLIP_VIEWPORT_length),
5961bf215546Sopenharmony_ci                      32, &ice->state.clip_vp_address);
5962bf215546Sopenharmony_ci#endif
5963bf215546Sopenharmony_ci
5964bf215546Sopenharmony_ci      for (unsigned i = 0; i < ice->state.num_viewports; i++) {
5965bf215546Sopenharmony_ci         const struct pipe_viewport_state *state = &ice->state.viewports[i];
5966bf215546Sopenharmony_ci         float gb_xmin, gb_xmax, gb_ymin, gb_ymax;
5967bf215546Sopenharmony_ci
5968bf215546Sopenharmony_ci#if GFX_VER == 8
5969bf215546Sopenharmony_ci         float vp_xmin = viewport_extent(state, 0, -1.0f);
5970bf215546Sopenharmony_ci         float vp_xmax = viewport_extent(state, 0,  1.0f);
5971bf215546Sopenharmony_ci         float vp_ymin = viewport_extent(state, 1, -1.0f);
5972bf215546Sopenharmony_ci         float vp_ymax = viewport_extent(state, 1,  1.0f);
5973bf215546Sopenharmony_ci#endif
5974bf215546Sopenharmony_ci         intel_calculate_guardband_size(0, cso_fb->width, 0, cso_fb->height,
5975bf215546Sopenharmony_ci                                        state->scale[0], state->scale[1],
5976bf215546Sopenharmony_ci                                        state->translate[0], state->translate[1],
5977bf215546Sopenharmony_ci                                        &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);
5978bf215546Sopenharmony_ci#if GFX_VER >= 7
5979bf215546Sopenharmony_ci         crocus_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp)
5980bf215546Sopenharmony_ci#else
5981bf215546Sopenharmony_ci         crocus_pack_state(GENX(SF_VIEWPORT), vp_map, vp)
5982bf215546Sopenharmony_ci#endif
5983bf215546Sopenharmony_ci         {
5984bf215546Sopenharmony_ci            vp.ViewportMatrixElementm00 = state->scale[0];
5985bf215546Sopenharmony_ci            vp.ViewportMatrixElementm11 = state->scale[1];
5986bf215546Sopenharmony_ci            vp.ViewportMatrixElementm22 = state->scale[2];
5987bf215546Sopenharmony_ci            vp.ViewportMatrixElementm30 = state->translate[0];
5988bf215546Sopenharmony_ci            vp.ViewportMatrixElementm31 = state->translate[1];
5989bf215546Sopenharmony_ci            vp.ViewportMatrixElementm32 = state->translate[2];
5990bf215546Sopenharmony_ci#if GFX_VER < 6
5991bf215546Sopenharmony_ci            struct pipe_scissor_state scissor;
5992bf215546Sopenharmony_ci            crocus_fill_scissor_rect(ice, 0, &scissor);
5993bf215546Sopenharmony_ci            vp.ScissorRectangle.ScissorRectangleXMin = scissor.minx;
5994bf215546Sopenharmony_ci            vp.ScissorRectangle.ScissorRectangleXMax = scissor.maxx;
5995bf215546Sopenharmony_ci            vp.ScissorRectangle.ScissorRectangleYMin = scissor.miny;
5996bf215546Sopenharmony_ci            vp.ScissorRectangle.ScissorRectangleYMax = scissor.maxy;
5997bf215546Sopenharmony_ci#endif
5998bf215546Sopenharmony_ci
5999bf215546Sopenharmony_ci#if GFX_VER >= 7
6000bf215546Sopenharmony_ci            vp.XMinClipGuardband = gb_xmin;
6001bf215546Sopenharmony_ci            vp.XMaxClipGuardband = gb_xmax;
6002bf215546Sopenharmony_ci            vp.YMinClipGuardband = gb_ymin;
6003bf215546Sopenharmony_ci            vp.YMaxClipGuardband = gb_ymax;
6004bf215546Sopenharmony_ci#endif
6005bf215546Sopenharmony_ci#if GFX_VER == 8
6006bf215546Sopenharmony_ci            vp.XMinViewPort = MAX2(vp_xmin, 0);
6007bf215546Sopenharmony_ci            vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1;
6008bf215546Sopenharmony_ci            vp.YMinViewPort = MAX2(vp_ymin, 0);
6009bf215546Sopenharmony_ci            vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1;
6010bf215546Sopenharmony_ci#endif
6011bf215546Sopenharmony_ci         }
6012bf215546Sopenharmony_ci#if GFX_VER < 7
6013bf215546Sopenharmony_ci         crocus_pack_state(GENX(CLIP_VIEWPORT), clip_map, clip) {
6014bf215546Sopenharmony_ci            clip.XMinClipGuardband = gb_xmin;
6015bf215546Sopenharmony_ci            clip.XMaxClipGuardband = gb_xmax;
6016bf215546Sopenharmony_ci            clip.YMinClipGuardband = gb_ymin;
6017bf215546Sopenharmony_ci            clip.YMaxClipGuardband = gb_ymax;
6018bf215546Sopenharmony_ci         }
6019bf215546Sopenharmony_ci#endif
6020bf215546Sopenharmony_ci#if GFX_VER >= 7
6021bf215546Sopenharmony_ci         vp_map += GENX(SF_CLIP_VIEWPORT_length);
6022bf215546Sopenharmony_ci#else
6023bf215546Sopenharmony_ci         vp_map += GENX(SF_VIEWPORT_length);
6024bf215546Sopenharmony_ci         clip_map += GENX(CLIP_VIEWPORT_length);
6025bf215546Sopenharmony_ci#endif
6026bf215546Sopenharmony_ci      }
6027bf215546Sopenharmony_ci#if GFX_VER >= 7
6028bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
6029bf215546Sopenharmony_ci         ptr.SFClipViewportPointer = sf_cl_vp_address;
6030bf215546Sopenharmony_ci      }
6031bf215546Sopenharmony_ci#elif GFX_VER == 6
6032bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
6033bf215546Sopenharmony_ci         vp.SFViewportStateChange = 1;
6034bf215546Sopenharmony_ci         vp.CLIPViewportStateChange = 1;
6035bf215546Sopenharmony_ci         vp.PointertoCLIP_VIEWPORT = ice->state.clip_vp_address;
6036bf215546Sopenharmony_ci         vp.PointertoSF_VIEWPORT = ice->state.sf_vp_address;
6037bf215546Sopenharmony_ci      }
6038bf215546Sopenharmony_ci#endif
6039bf215546Sopenharmony_ci   }
6040bf215546Sopenharmony_ci
6041bf215546Sopenharmony_ci#if GFX_VER >= 6
6042bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN6_URB) {
6043bf215546Sopenharmony_ci#if GFX_VER == 6
6044bf215546Sopenharmony_ci      bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL
6045bf215546Sopenharmony_ci         || ice->shaders.ff_gs_prog;
6046bf215546Sopenharmony_ci
6047bf215546Sopenharmony_ci      struct brw_vue_prog_data *vue_prog_data =
6048bf215546Sopenharmony_ci         (void *) ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
6049bf215546Sopenharmony_ci      const unsigned vs_size = vue_prog_data->urb_entry_size;
6050bf215546Sopenharmony_ci      unsigned gs_size = vs_size;
6051bf215546Sopenharmony_ci      if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
6052bf215546Sopenharmony_ci         struct brw_vue_prog_data *gs_vue_prog_data =
6053bf215546Sopenharmony_ci            (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
6054bf215546Sopenharmony_ci         gs_size = gs_vue_prog_data->urb_entry_size;
6055bf215546Sopenharmony_ci      }
6056bf215546Sopenharmony_ci
6057bf215546Sopenharmony_ci      genX(crocus_upload_urb)(batch, vs_size, gs_present, gs_size);
6058bf215546Sopenharmony_ci#endif
6059bf215546Sopenharmony_ci#if GFX_VER >= 7
6060bf215546Sopenharmony_ci      const struct intel_device_info *devinfo = &batch->screen->devinfo;
6061bf215546Sopenharmony_ci      bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL;
6062bf215546Sopenharmony_ci      bool tess_present = ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL;
6063bf215546Sopenharmony_ci      unsigned entry_size[4];
6064bf215546Sopenharmony_ci
6065bf215546Sopenharmony_ci      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
6066bf215546Sopenharmony_ci         if (!ice->shaders.prog[i]) {
6067bf215546Sopenharmony_ci            entry_size[i] = 1;
6068bf215546Sopenharmony_ci         } else {
6069bf215546Sopenharmony_ci            struct brw_vue_prog_data *vue_prog_data =
6070bf215546Sopenharmony_ci               (void *) ice->shaders.prog[i]->prog_data;
6071bf215546Sopenharmony_ci            entry_size[i] = vue_prog_data->urb_entry_size;
6072bf215546Sopenharmony_ci         }
6073bf215546Sopenharmony_ci         assert(entry_size[i] != 0);
6074bf215546Sopenharmony_ci      }
6075bf215546Sopenharmony_ci
6076bf215546Sopenharmony_ci      /* If we're just switching between programs with the same URB requirements,
6077bf215546Sopenharmony_ci       * skip the rest of the logic.
6078bf215546Sopenharmony_ci       */
6079bf215546Sopenharmony_ci      bool no_change = false;
6080bf215546Sopenharmony_ci      if (ice->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&
6081bf215546Sopenharmony_ci          ice->urb.gs_present == gs_present &&
6082bf215546Sopenharmony_ci          ice->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&
6083bf215546Sopenharmony_ci          ice->urb.tess_present == tess_present &&
6084bf215546Sopenharmony_ci          ice->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&
6085bf215546Sopenharmony_ci          ice->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {
6086bf215546Sopenharmony_ci         no_change = true;
6087bf215546Sopenharmony_ci      }
6088bf215546Sopenharmony_ci
6089bf215546Sopenharmony_ci      if (!no_change) {
6090bf215546Sopenharmony_ci         ice->urb.vsize = entry_size[MESA_SHADER_VERTEX];
6091bf215546Sopenharmony_ci         ice->urb.gs_present = gs_present;
6092bf215546Sopenharmony_ci         ice->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];
6093bf215546Sopenharmony_ci         ice->urb.tess_present = tess_present;
6094bf215546Sopenharmony_ci         ice->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];
6095bf215546Sopenharmony_ci         ice->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];
6096bf215546Sopenharmony_ci
6097bf215546Sopenharmony_ci         unsigned entries[4];
6098bf215546Sopenharmony_ci         unsigned start[4];
6099bf215546Sopenharmony_ci         bool constrained;
6100bf215546Sopenharmony_ci         intel_get_urb_config(devinfo,
6101bf215546Sopenharmony_ci                              batch->screen->l3_config_3d,
6102bf215546Sopenharmony_ci                              tess_present,
6103bf215546Sopenharmony_ci                              gs_present,
6104bf215546Sopenharmony_ci                              entry_size,
6105bf215546Sopenharmony_ci                              entries, start, NULL, &constrained);
6106bf215546Sopenharmony_ci
6107bf215546Sopenharmony_ci#if GFX_VER == 7
6108bf215546Sopenharmony_ci         if (devinfo->platform == INTEL_PLATFORM_IVB)
6109bf215546Sopenharmony_ci            gen7_emit_vs_workaround_flush(batch);
6110bf215546Sopenharmony_ci#endif
6111bf215546Sopenharmony_ci         for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
6112bf215546Sopenharmony_ci            crocus_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
6113bf215546Sopenharmony_ci               urb._3DCommandSubOpcode += i;
6114bf215546Sopenharmony_ci               urb.VSURBStartingAddress     = start[i];
6115bf215546Sopenharmony_ci               urb.VSURBEntryAllocationSize = entry_size[i] - 1;
6116bf215546Sopenharmony_ci               urb.VSNumberofURBEntries     = entries[i];
6117bf215546Sopenharmony_ci            }
6118bf215546Sopenharmony_ci         }
6119bf215546Sopenharmony_ci      }
6120bf215546Sopenharmony_ci#endif
6121bf215546Sopenharmony_ci   }
6122bf215546Sopenharmony_ci
6123bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN6_BLEND_STATE) {
6124bf215546Sopenharmony_ci      struct crocus_blend_state *cso_blend = ice->state.cso_blend;
6125bf215546Sopenharmony_ci      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6126bf215546Sopenharmony_ci      struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
6127bf215546Sopenharmony_ci
6128bf215546Sopenharmony_ci      STATIC_ASSERT(GENX(BLEND_STATE_ENTRY_length) == 2);
6129bf215546Sopenharmony_ci      int rt_dwords =
6130bf215546Sopenharmony_ci         MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length);
6131bf215546Sopenharmony_ci#if GFX_VER >= 8
6132bf215546Sopenharmony_ci      rt_dwords += GENX(BLEND_STATE_length);
6133bf215546Sopenharmony_ci#endif
6134bf215546Sopenharmony_ci      uint32_t blend_offset;
6135bf215546Sopenharmony_ci      uint32_t *blend_map =
6136bf215546Sopenharmony_ci         stream_state(batch,
6137bf215546Sopenharmony_ci                      4 * rt_dwords, 64, &blend_offset);
6138bf215546Sopenharmony_ci
6139bf215546Sopenharmony_ci#if GFX_VER >= 8
6140bf215546Sopenharmony_ci   struct GENX(BLEND_STATE) be = { 0 };
6141bf215546Sopenharmony_ci   {
6142bf215546Sopenharmony_ci#else
6143bf215546Sopenharmony_ci   for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
6144bf215546Sopenharmony_ci      struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
6145bf215546Sopenharmony_ci#define be entry
6146bf215546Sopenharmony_ci#endif
6147bf215546Sopenharmony_ci
6148bf215546Sopenharmony_ci      be.AlphaTestEnable = cso_zsa->cso.alpha_enabled;
6149bf215546Sopenharmony_ci      be.AlphaTestFunction = translate_compare_func(cso_zsa->cso.alpha_func);
6150bf215546Sopenharmony_ci      be.AlphaToCoverageEnable = cso_blend->cso.alpha_to_coverage;
6151bf215546Sopenharmony_ci      be.AlphaToOneEnable = cso_blend->cso.alpha_to_one;
6152bf215546Sopenharmony_ci      be.AlphaToCoverageDitherEnable = GFX_VER >= 7 && cso_blend->cso.alpha_to_coverage;
6153bf215546Sopenharmony_ci      be.ColorDitherEnable = cso_blend->cso.dither;
6154bf215546Sopenharmony_ci
6155bf215546Sopenharmony_ci#if GFX_VER >= 8
6156bf215546Sopenharmony_ci      for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
6157bf215546Sopenharmony_ci         struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
6158bf215546Sopenharmony_ci#else
6159bf215546Sopenharmony_ci      {
6160bf215546Sopenharmony_ci#endif
6161bf215546Sopenharmony_ci         const struct pipe_rt_blend_state *rt =
6162bf215546Sopenharmony_ci            &cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? i : 0];
6163bf215546Sopenharmony_ci
6164bf215546Sopenharmony_ci         be.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &entry, cso_blend, i) ||
6165bf215546Sopenharmony_ci            be.IndependentAlphaBlendEnable;
6166bf215546Sopenharmony_ci
6167bf215546Sopenharmony_ci         if (GFX_VER >= 8 || can_emit_logic_op(ice)) {
6168bf215546Sopenharmony_ci            entry.LogicOpEnable = cso_blend->cso.logicop_enable;
6169bf215546Sopenharmony_ci            entry.LogicOpFunction = cso_blend->cso.logicop_func;
6170bf215546Sopenharmony_ci         }
6171bf215546Sopenharmony_ci
6172bf215546Sopenharmony_ci         entry.ColorClampRange = COLORCLAMP_RTFORMAT;
6173bf215546Sopenharmony_ci         entry.PreBlendColorClampEnable = true;
6174bf215546Sopenharmony_ci         entry.PostBlendColorClampEnable = true;
6175bf215546Sopenharmony_ci
6176bf215546Sopenharmony_ci         entry.WriteDisableRed   = !(rt->colormask & PIPE_MASK_R);
6177bf215546Sopenharmony_ci         entry.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G);
6178bf215546Sopenharmony_ci         entry.WriteDisableBlue  = !(rt->colormask & PIPE_MASK_B);
6179bf215546Sopenharmony_ci         entry.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A);
6180bf215546Sopenharmony_ci
6181bf215546Sopenharmony_ci#if GFX_VER >= 8
6182bf215546Sopenharmony_ci         GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
6183bf215546Sopenharmony_ci#else
6184bf215546Sopenharmony_ci         GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry);
6185bf215546Sopenharmony_ci#endif
6186bf215546Sopenharmony_ci      }
6187bf215546Sopenharmony_ci   }
6188bf215546Sopenharmony_ci#if GFX_VER >= 8
6189bf215546Sopenharmony_ci   GENX(BLEND_STATE_pack)(NULL, blend_map, &be);
6190bf215546Sopenharmony_ci#endif
6191bf215546Sopenharmony_ci#if GFX_VER < 7
6192bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
6193bf215546Sopenharmony_ci         ptr.PointertoBLEND_STATE = blend_offset;
6194bf215546Sopenharmony_ci         ptr.BLEND_STATEChange = true;
6195bf215546Sopenharmony_ci      }
6196bf215546Sopenharmony_ci#else
6197bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
6198bf215546Sopenharmony_ci         ptr.BlendStatePointer = blend_offset;
6199bf215546Sopenharmony_ci#if GFX_VER >= 8
6200bf215546Sopenharmony_ci         ptr.BlendStatePointerValid = true;
6201bf215546Sopenharmony_ci#endif
6202bf215546Sopenharmony_ci      }
6203bf215546Sopenharmony_ci#endif
6204bf215546Sopenharmony_ci   }
6205bf215546Sopenharmony_ci#endif
6206bf215546Sopenharmony_ci
6207bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_COLOR_CALC_STATE) {
6208bf215546Sopenharmony_ci      struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
6209bf215546Sopenharmony_ci      UNUSED struct crocus_blend_state *cso_blend = ice->state.cso_blend;
6210bf215546Sopenharmony_ci      struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
6211bf215546Sopenharmony_ci      uint32_t cc_offset;
6212bf215546Sopenharmony_ci      void *cc_map =
6213bf215546Sopenharmony_ci         stream_state(batch,
6214bf215546Sopenharmony_ci                      sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
6215bf215546Sopenharmony_ci                      64, &cc_offset);
6216bf215546Sopenharmony_ci#if GFX_VER <= 5
6217bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6218bf215546Sopenharmony_ci#endif
6219bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(COLOR_CALC_STATE), cc_map, cc) {
6220bf215546Sopenharmony_ci         cc.AlphaTestFormat = ALPHATEST_FLOAT32;
6221bf215546Sopenharmony_ci         cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value;
6222bf215546Sopenharmony_ci
6223bf215546Sopenharmony_ci#if GFX_VER <= 5
6224bf215546Sopenharmony_ci
6225bf215546Sopenharmony_ci         set_depth_stencil_bits(ice, &cc);
6226bf215546Sopenharmony_ci
6227bf215546Sopenharmony_ci         if (cso_blend->cso.logicop_enable) {
6228bf215546Sopenharmony_ci            if (can_emit_logic_op(ice)) {
6229bf215546Sopenharmony_ci               cc.LogicOpEnable = cso_blend->cso.logicop_enable;
6230bf215546Sopenharmony_ci               cc.LogicOpFunction = cso_blend->cso.logicop_func;
6231bf215546Sopenharmony_ci            }
6232bf215546Sopenharmony_ci         }
6233bf215546Sopenharmony_ci         cc.ColorDitherEnable = cso_blend->cso.dither;
6234bf215546Sopenharmony_ci
6235bf215546Sopenharmony_ci         cc.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &cc, cso_blend, 0);
6236bf215546Sopenharmony_ci
6237bf215546Sopenharmony_ci         if (cso->cso.alpha_enabled && ice->state.framebuffer.nr_cbufs <= 1) {
6238bf215546Sopenharmony_ci            cc.AlphaTestEnable = cso->cso.alpha_enabled;
6239bf215546Sopenharmony_ci            cc.AlphaTestFunction = translate_compare_func(cso->cso.alpha_func);
6240bf215546Sopenharmony_ci         }
6241bf215546Sopenharmony_ci         cc.StatisticsEnable = ice->state.stats_wm ? 1 : 0;
6242bf215546Sopenharmony_ci         cc.CCViewportStatePointer = ro_bo(batch->state.bo, ice->state.cc_vp_address);
6243bf215546Sopenharmony_ci#else
6244bf215546Sopenharmony_ci         cc.AlphaTestFormat = ALPHATEST_FLOAT32;
6245bf215546Sopenharmony_ci         cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value;
6246bf215546Sopenharmony_ci
6247bf215546Sopenharmony_ci         cc.BlendConstantColorRed   = ice->state.blend_color.color[0];
6248bf215546Sopenharmony_ci         cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
6249bf215546Sopenharmony_ci         cc.BlendConstantColorBlue  = ice->state.blend_color.color[2];
6250bf215546Sopenharmony_ci         cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
6251bf215546Sopenharmony_ci#endif
6252bf215546Sopenharmony_ci         cc.StencilReferenceValue = p_stencil_refs->ref_value[0];
6253bf215546Sopenharmony_ci         cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
6254bf215546Sopenharmony_ci      }
6255bf215546Sopenharmony_ci      ice->shaders.cc_offset = cc_offset;
6256bf215546Sopenharmony_ci#if GFX_VER >= 6
6257bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
6258bf215546Sopenharmony_ci         ptr.ColorCalcStatePointer = cc_offset;
6259bf215546Sopenharmony_ci#if GFX_VER != 7
6260bf215546Sopenharmony_ci         ptr.ColorCalcStatePointerValid = true;
6261bf215546Sopenharmony_ci#endif
6262bf215546Sopenharmony_ci      }
6263bf215546Sopenharmony_ci#endif
6264bf215546Sopenharmony_ci   }
6265bf215546Sopenharmony_ci#if GFX_VER <= 5
6266bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN4_CONSTANT_COLOR) {
6267bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
6268bf215546Sopenharmony_ci         blend_cc.BlendConstantColorRed = ice->state.blend_color.color[0];
6269bf215546Sopenharmony_ci         blend_cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
6270bf215546Sopenharmony_ci         blend_cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
6271bf215546Sopenharmony_ci         blend_cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
6272bf215546Sopenharmony_ci      }
6273bf215546Sopenharmony_ci   }
6274bf215546Sopenharmony_ci#endif
6275bf215546Sopenharmony_ci   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
6276bf215546Sopenharmony_ci      if (!(stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage)))
6277bf215546Sopenharmony_ci         continue;
6278bf215546Sopenharmony_ci
6279bf215546Sopenharmony_ci      struct crocus_shader_state *shs = &ice->state.shaders[stage];
6280bf215546Sopenharmony_ci      struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
6281bf215546Sopenharmony_ci
6282bf215546Sopenharmony_ci      if (!shader)
6283bf215546Sopenharmony_ci         continue;
6284bf215546Sopenharmony_ci
6285bf215546Sopenharmony_ci      if (shs->sysvals_need_upload)
6286bf215546Sopenharmony_ci         upload_sysvals(ice, stage);
6287bf215546Sopenharmony_ci
6288bf215546Sopenharmony_ci#if GFX_VER <= 5
6289bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN4_CURBE;
6290bf215546Sopenharmony_ci#endif
6291bf215546Sopenharmony_ci#if GFX_VER >= 7
6292bf215546Sopenharmony_ci      struct push_bos push_bos = {};
6293bf215546Sopenharmony_ci      setup_constant_buffers(ice, batch, stage, &push_bos);
6294bf215546Sopenharmony_ci
6295bf215546Sopenharmony_ci      emit_push_constant_packets(ice, batch, stage, &push_bos);
6296bf215546Sopenharmony_ci#endif
6297bf215546Sopenharmony_ci   }
6298bf215546Sopenharmony_ci
6299bf215546Sopenharmony_ci   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
6300bf215546Sopenharmony_ci      if (stage_dirty & (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage)) {
6301bf215546Sopenharmony_ci         if (ice->shaders.prog[stage]) {
6302bf215546Sopenharmony_ci#if GFX_VER <= 6
6303bf215546Sopenharmony_ci            dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;
6304bf215546Sopenharmony_ci#endif
6305bf215546Sopenharmony_ci            crocus_populate_binding_table(ice, batch, stage, false);
6306bf215546Sopenharmony_ci            ice->shaders.prog[stage]->bind_bo_offset =
6307bf215546Sopenharmony_ci               crocus_upload_binding_table(ice, batch,
6308bf215546Sopenharmony_ci                                           ice->shaders.prog[stage]->surf_offset,
6309bf215546Sopenharmony_ci                                           ice->shaders.prog[stage]->bt.size_bytes);
6310bf215546Sopenharmony_ci
6311bf215546Sopenharmony_ci#if GFX_VER >= 7
6312bf215546Sopenharmony_ci            crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
6313bf215546Sopenharmony_ci               ptr._3DCommandSubOpcode = 38 + stage;
6314bf215546Sopenharmony_ci               ptr.PointertoVSBindingTable = ice->shaders.prog[stage]->bind_bo_offset;
6315bf215546Sopenharmony_ci            }
6316bf215546Sopenharmony_ci#endif
6317bf215546Sopenharmony_ci#if GFX_VER == 6
6318bf215546Sopenharmony_ci         } else if (stage == MESA_SHADER_GEOMETRY && ice->shaders.ff_gs_prog) {
6319bf215546Sopenharmony_ci            dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;
6320bf215546Sopenharmony_ci            crocus_populate_binding_table(ice, batch, stage, true);
6321bf215546Sopenharmony_ci            ice->shaders.ff_gs_prog->bind_bo_offset =
6322bf215546Sopenharmony_ci               crocus_upload_binding_table(ice, batch,
6323bf215546Sopenharmony_ci                                           ice->shaders.ff_gs_prog->surf_offset,
6324bf215546Sopenharmony_ci                                           ice->shaders.ff_gs_prog->bt.size_bytes);
6325bf215546Sopenharmony_ci#endif
6326bf215546Sopenharmony_ci         }
6327bf215546Sopenharmony_ci      }
6328bf215546Sopenharmony_ci   }
6329bf215546Sopenharmony_ci#if GFX_VER <= 6
6330bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS) {
6331bf215546Sopenharmony_ci      struct crocus_compiled_shader *gs = ice->shaders.prog[MESA_SHADER_GEOMETRY];
6332bf215546Sopenharmony_ci      if (gs == NULL)
6333bf215546Sopenharmony_ci         gs = ice->shaders.ff_gs_prog;
6334bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), ptr) {
6335bf215546Sopenharmony_ci         ptr.PointertoVSBindingTable = ice->shaders.prog[MESA_SHADER_VERTEX]->bind_bo_offset;
6336bf215546Sopenharmony_ci         ptr.PointertoPSBindingTable = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bind_bo_offset;
6337bf215546Sopenharmony_ci#if GFX_VER == 6
6338bf215546Sopenharmony_ci         ptr.VSBindingTableChange = true;
6339bf215546Sopenharmony_ci         ptr.PSBindingTableChange = true;
6340bf215546Sopenharmony_ci         ptr.GSBindingTableChange = gs ? true : false;
6341bf215546Sopenharmony_ci         ptr.PointertoGSBindingTable = gs ? gs->bind_bo_offset : 0;
6342bf215546Sopenharmony_ci#endif
6343bf215546Sopenharmony_ci      }
6344bf215546Sopenharmony_ci   }
6345bf215546Sopenharmony_ci#endif
6346bf215546Sopenharmony_ci
6347bf215546Sopenharmony_ci   bool sampler_updates = dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS;
6348bf215546Sopenharmony_ci   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
6349bf215546Sopenharmony_ci      if (!(stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage)) ||
6350bf215546Sopenharmony_ci          !ice->shaders.prog[stage])
6351bf215546Sopenharmony_ci         continue;
6352bf215546Sopenharmony_ci
6353bf215546Sopenharmony_ci      crocus_upload_sampler_states(ice, batch, stage);
6354bf215546Sopenharmony_ci
6355bf215546Sopenharmony_ci      sampler_updates = true;
6356bf215546Sopenharmony_ci
6357bf215546Sopenharmony_ci#if GFX_VER >= 7
6358bf215546Sopenharmony_ci      struct crocus_shader_state *shs = &ice->state.shaders[stage];
6359bf215546Sopenharmony_ci
6360bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
6361bf215546Sopenharmony_ci         ptr._3DCommandSubOpcode = 43 + stage;
6362bf215546Sopenharmony_ci         ptr.PointertoVSSamplerState = shs->sampler_offset;
6363bf215546Sopenharmony_ci      }
6364bf215546Sopenharmony_ci#endif
6365bf215546Sopenharmony_ci   }
6366bf215546Sopenharmony_ci
6367bf215546Sopenharmony_ci   if (sampler_updates) {
6368bf215546Sopenharmony_ci#if GFX_VER == 6
6369bf215546Sopenharmony_ci      struct crocus_shader_state *shs_vs = &ice->state.shaders[MESA_SHADER_VERTEX];
6370bf215546Sopenharmony_ci      struct crocus_shader_state *shs_gs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
6371bf215546Sopenharmony_ci      struct crocus_shader_state *shs_fs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
6372bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ptr) {
6373bf215546Sopenharmony_ci         if (ice->shaders.prog[MESA_SHADER_VERTEX] &&
6374bf215546Sopenharmony_ci             (dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||
6375bf215546Sopenharmony_ci              stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_VERTEX))) {
6376bf215546Sopenharmony_ci            ptr.VSSamplerStateChange = true;
6377bf215546Sopenharmony_ci            ptr.PointertoVSSamplerState = shs_vs->sampler_offset;
6378bf215546Sopenharmony_ci         }
6379bf215546Sopenharmony_ci         if (ice->shaders.prog[MESA_SHADER_GEOMETRY] &&
6380bf215546Sopenharmony_ci             (dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||
6381bf215546Sopenharmony_ci              stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_GEOMETRY))) {
6382bf215546Sopenharmony_ci            ptr.GSSamplerStateChange = true;
6383bf215546Sopenharmony_ci            ptr.PointertoGSSamplerState = shs_gs->sampler_offset;
6384bf215546Sopenharmony_ci         }
6385bf215546Sopenharmony_ci         if (ice->shaders.prog[MESA_SHADER_FRAGMENT] &&
6386bf215546Sopenharmony_ci             (dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||
6387bf215546Sopenharmony_ci              stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_FRAGMENT))) {
6388bf215546Sopenharmony_ci            ptr.PSSamplerStateChange = true;
6389bf215546Sopenharmony_ci            ptr.PointertoPSSamplerState = shs_fs->sampler_offset;
6390bf215546Sopenharmony_ci         }
6391bf215546Sopenharmony_ci      }
6392bf215546Sopenharmony_ci#endif
6393bf215546Sopenharmony_ci   }
6394bf215546Sopenharmony_ci
6395bf215546Sopenharmony_ci#if GFX_VER >= 6
6396bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN6_MULTISAMPLE) {
6397bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
6398bf215546Sopenharmony_ci         ms.PixelLocation =
6399bf215546Sopenharmony_ci            ice->state.cso_rast->cso.half_pixel_center ? CENTER : UL_CORNER;
6400bf215546Sopenharmony_ci         if (ice->state.framebuffer.samples > 0)
6401bf215546Sopenharmony_ci            ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
6402bf215546Sopenharmony_ci#if GFX_VER == 6
6403bf215546Sopenharmony_ci         INTEL_SAMPLE_POS_4X(ms.Sample);
6404bf215546Sopenharmony_ci#elif GFX_VER == 7
6405bf215546Sopenharmony_ci         switch (ice->state.framebuffer.samples) {
6406bf215546Sopenharmony_ci         case 1:
6407bf215546Sopenharmony_ci            INTEL_SAMPLE_POS_1X(ms.Sample);
6408bf215546Sopenharmony_ci            break;
6409bf215546Sopenharmony_ci         case 2:
6410bf215546Sopenharmony_ci            INTEL_SAMPLE_POS_2X(ms.Sample);
6411bf215546Sopenharmony_ci            break;
6412bf215546Sopenharmony_ci         case 4:
6413bf215546Sopenharmony_ci            INTEL_SAMPLE_POS_4X(ms.Sample);
6414bf215546Sopenharmony_ci            break;
6415bf215546Sopenharmony_ci         case 8:
6416bf215546Sopenharmony_ci            INTEL_SAMPLE_POS_8X(ms.Sample);
6417bf215546Sopenharmony_ci            break;
6418bf215546Sopenharmony_ci         default:
6419bf215546Sopenharmony_ci            break;
6420bf215546Sopenharmony_ci         }
6421bf215546Sopenharmony_ci#endif
6422bf215546Sopenharmony_ci      }
6423bf215546Sopenharmony_ci   }
6424bf215546Sopenharmony_ci
6425bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN6_SAMPLE_MASK) {
6426bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
6427bf215546Sopenharmony_ci         ms.SampleMask = determine_sample_mask(ice);
6428bf215546Sopenharmony_ci      }
6429bf215546Sopenharmony_ci   }
6430bf215546Sopenharmony_ci#endif
6431bf215546Sopenharmony_ci
6432bf215546Sopenharmony_ci#if GFX_VER >= 7
6433bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
6434bf215546Sopenharmony_ci   if ((stage_dirty & CROCUS_STAGE_DIRTY_FS) && shader) {
6435bf215546Sopenharmony_ci      struct brw_stage_prog_data *prog_data = shader->prog_data;
6436bf215546Sopenharmony_ci      struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
6437bf215546Sopenharmony_ci
6438bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
6439bf215546Sopenharmony_ci
6440bf215546Sopenharmony_ci         /* Initialize the execution mask with VMask.  Otherwise, derivatives are
6441bf215546Sopenharmony_ci          * incorrect for subspans where some of the pixels are unlit.  We believe
6442bf215546Sopenharmony_ci          * the bit just didn't take effect in previous generations.
6443bf215546Sopenharmony_ci          */
6444bf215546Sopenharmony_ci         ps.VectorMaskEnable = GFX_VER >= 8 && wm_prog_data->uses_vmask;
6445bf215546Sopenharmony_ci
6446bf215546Sopenharmony_ci         ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
6447bf215546Sopenharmony_ci         ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
6448bf215546Sopenharmony_ci         ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
6449bf215546Sopenharmony_ci
6450bf215546Sopenharmony_ci         ps.DispatchGRFStartRegisterForConstantSetupData0 =
6451bf215546Sopenharmony_ci            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
6452bf215546Sopenharmony_ci         ps.DispatchGRFStartRegisterForConstantSetupData1 =
6453bf215546Sopenharmony_ci            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
6454bf215546Sopenharmony_ci         ps.DispatchGRFStartRegisterForConstantSetupData2 =
6455bf215546Sopenharmony_ci            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
6456bf215546Sopenharmony_ci
6457bf215546Sopenharmony_ci         ps.KernelStartPointer0 = KSP(ice, shader) +
6458bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
6459bf215546Sopenharmony_ci         ps.KernelStartPointer1 = KSP(ice, shader) +
6460bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
6461bf215546Sopenharmony_ci         ps.KernelStartPointer2 = KSP(ice, shader) +
6462bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
6463bf215546Sopenharmony_ci
6464bf215546Sopenharmony_ci#if GFX_VERx10 == 75
6465bf215546Sopenharmony_ci         ps.SampleMask = determine_sample_mask(ice);
6466bf215546Sopenharmony_ci#endif
6467bf215546Sopenharmony_ci         // XXX: WABTPPrefetchDisable, see above, drop at C0
6468bf215546Sopenharmony_ci         ps.BindingTableEntryCount = shader->bt.size_bytes / 4;
6469bf215546Sopenharmony_ci         ps.FloatingPointMode = prog_data->use_alt_mode;
6470bf215546Sopenharmony_ci#if GFX_VER >= 8
6471bf215546Sopenharmony_ci         ps.MaximumNumberofThreadsPerPSD =
6472bf215546Sopenharmony_ci            batch->screen->devinfo.max_threads_per_psd - 2;
6473bf215546Sopenharmony_ci#else
6474bf215546Sopenharmony_ci         ps.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1;
6475bf215546Sopenharmony_ci#endif
6476bf215546Sopenharmony_ci
6477bf215546Sopenharmony_ci         ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0;
6478bf215546Sopenharmony_ci
6479bf215546Sopenharmony_ci#if GFX_VER < 8
6480bf215546Sopenharmony_ci         ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
6481bf215546Sopenharmony_ci         ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend && ice->state.cso_blend->dual_color_blending;
6482bf215546Sopenharmony_ci         ps.AttributeEnable = (wm_prog_data->num_varying_inputs != 0);
6483bf215546Sopenharmony_ci#endif
6484bf215546Sopenharmony_ci         /* From the documentation for this packet:
6485bf215546Sopenharmony_ci          * "If the PS kernel does not need the Position XY Offsets to
6486bf215546Sopenharmony_ci          *  compute a Position Value, then this field should be programmed
6487bf215546Sopenharmony_ci          *  to POSOFFSET_NONE."
6488bf215546Sopenharmony_ci          *
6489bf215546Sopenharmony_ci          * "SW Recommendation: If the PS kernel needs the Position Offsets
6490bf215546Sopenharmony_ci          *  to compute a Position XY value, this field should match Position
6491bf215546Sopenharmony_ci          *  ZW Interpolation Mode to ensure a consistent position.xyzw
6492bf215546Sopenharmony_ci          *  computation."
6493bf215546Sopenharmony_ci          *
6494bf215546Sopenharmony_ci          * We only require XY sample offsets. So, this recommendation doesn't
6495bf215546Sopenharmony_ci          * look useful at the moment.  We might need this in future.
6496bf215546Sopenharmony_ci          */
6497bf215546Sopenharmony_ci         ps.PositionXYOffsetSelect =
6498bf215546Sopenharmony_ci            wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
6499bf215546Sopenharmony_ci
6500bf215546Sopenharmony_ci         if (wm_prog_data->base.total_scratch) {
6501bf215546Sopenharmony_ci            struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT);
6502bf215546Sopenharmony_ci            ps.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11;
6503bf215546Sopenharmony_ci            ps.ScratchSpaceBasePointer = rw_bo(bo, 0);
6504bf215546Sopenharmony_ci         }
6505bf215546Sopenharmony_ci      }
6506bf215546Sopenharmony_ci#if GFX_VER == 8
6507bf215546Sopenharmony_ci      const struct shader_info *fs_info =
6508bf215546Sopenharmony_ci         crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
6509bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
6510bf215546Sopenharmony_ci         psx.PixelShaderValid = true;
6511bf215546Sopenharmony_ci         psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
6512bf215546Sopenharmony_ci         psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
6513bf215546Sopenharmony_ci         psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
6514bf215546Sopenharmony_ci         psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
6515bf215546Sopenharmony_ci         psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
6516bf215546Sopenharmony_ci         psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
6517bf215546Sopenharmony_ci
6518bf215546Sopenharmony_ci         /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
6519bf215546Sopenharmony_ci         if (wm_prog_data->uses_sample_mask)
6520bf215546Sopenharmony_ci            psx.PixelShaderUsesInputCoverageMask = true;
6521bf215546Sopenharmony_ci
6522bf215546Sopenharmony_ci         psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
6523bf215546Sopenharmony_ci
6524bf215546Sopenharmony_ci         /* The stricter cross-primitive coherency guarantees that the hardware
6525bf215546Sopenharmony_ci          * gives us with the "Accesses UAV" bit set for at least one shader stage
6526bf215546Sopenharmony_ci          * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
6527bf215546Sopenharmony_ci          * are redundant within the current image, atomic counter and SSBO GL
6528bf215546Sopenharmony_ci          * APIs, which all have very loose ordering and coherency requirements
6529bf215546Sopenharmony_ci          * and generally rely on the application to insert explicit barriers when
6530bf215546Sopenharmony_ci          * a shader invocation is expected to see the memory writes performed by
6531bf215546Sopenharmony_ci          * the invocations of some previous primitive.  Regardless of the value
6532bf215546Sopenharmony_ci          * of "UAV coherency required", the "Accesses UAV" bits will implicitly
6533bf215546Sopenharmony_ci          * cause an in most cases useless DC flush when the lowermost stage with
6534bf215546Sopenharmony_ci          * the bit set finishes execution.
6535bf215546Sopenharmony_ci          *
6536bf215546Sopenharmony_ci          * It would be nice to disable it, but in some cases we can't because on
6537bf215546Sopenharmony_ci          * Gfx8+ it also has an influence on rasterization via the PS UAV-only
6538bf215546Sopenharmony_ci          * signal (which could be set independently from the coherency mechanism
6539bf215546Sopenharmony_ci          * in the 3DSTATE_WM command on Gfx7), and because in some cases it will
6540bf215546Sopenharmony_ci          * determine whether the hardware skips execution of the fragment shader
6541bf215546Sopenharmony_ci          * or not via the ThreadDispatchEnable signal.  However if we know that
6542bf215546Sopenharmony_ci          * GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
6543bf215546Sopenharmony_ci          * GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
6544bf215546Sopenharmony_ci          * difference so we may just disable it here.
6545bf215546Sopenharmony_ci          *
6546bf215546Sopenharmony_ci          * Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't
6547bf215546Sopenharmony_ci          * take into account KillPixels when no depth or stencil writes are
6548bf215546Sopenharmony_ci          * enabled.  In order for occlusion queries to work correctly with no
6549bf215546Sopenharmony_ci          * attachments, we need to force-enable here.
6550bf215546Sopenharmony_ci          *
6551bf215546Sopenharmony_ci          */
6552bf215546Sopenharmony_ci         if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
6553bf215546Sopenharmony_ci             !(has_writeable_rt(ice->state.cso_blend, fs_info)))
6554bf215546Sopenharmony_ci            psx.PixelShaderHasUAV = true;
6555bf215546Sopenharmony_ci      }
6556bf215546Sopenharmony_ci#endif
6557bf215546Sopenharmony_ci   }
6558bf215546Sopenharmony_ci#endif
6559bf215546Sopenharmony_ci
6560bf215546Sopenharmony_ci#if GFX_VER >= 7
6561bf215546Sopenharmony_ci   if (ice->state.streamout_active) {
6562bf215546Sopenharmony_ci      if (dirty & CROCUS_DIRTY_GEN7_SO_BUFFERS) {
6563bf215546Sopenharmony_ci         for (int i = 0; i < 4; i++) {
6564bf215546Sopenharmony_ci            struct crocus_stream_output_target *tgt =
6565bf215546Sopenharmony_ci               (void *) ice->state.so_target[i];
6566bf215546Sopenharmony_ci
6567bf215546Sopenharmony_ci            if (!tgt) {
6568bf215546Sopenharmony_ci               crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
6569bf215546Sopenharmony_ci                  sob.SOBufferIndex = i;
6570bf215546Sopenharmony_ci                  sob.MOCS = crocus_mocs(NULL, &batch->screen->isl_dev);
6571bf215546Sopenharmony_ci               }
6572bf215546Sopenharmony_ci               continue;
6573bf215546Sopenharmony_ci            }
6574bf215546Sopenharmony_ci            struct crocus_resource *res = (void *) tgt->base.buffer;
6575bf215546Sopenharmony_ci            uint32_t start = tgt->base.buffer_offset;
6576bf215546Sopenharmony_ci#if GFX_VER < 8
6577bf215546Sopenharmony_ci            uint32_t end = ALIGN(start + tgt->base.buffer_size, 4);
6578bf215546Sopenharmony_ci#endif
6579bf215546Sopenharmony_ci            crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {
6580bf215546Sopenharmony_ci               sob.SOBufferIndex = i;
6581bf215546Sopenharmony_ci
6582bf215546Sopenharmony_ci               sob.SurfaceBaseAddress = rw_bo(res->bo, start);
6583bf215546Sopenharmony_ci               sob.MOCS = crocus_mocs(res->bo, &batch->screen->isl_dev);
6584bf215546Sopenharmony_ci#if GFX_VER < 8
6585bf215546Sopenharmony_ci               sob.SurfacePitch = tgt->stride;
6586bf215546Sopenharmony_ci               sob.SurfaceEndAddress = rw_bo(res->bo, end);
6587bf215546Sopenharmony_ci#else
6588bf215546Sopenharmony_ci               sob.SOBufferEnable = true;
6589bf215546Sopenharmony_ci               sob.StreamOffsetWriteEnable = true;
6590bf215546Sopenharmony_ci               sob.StreamOutputBufferOffsetAddressEnable = true;
6591bf215546Sopenharmony_ci
6592bf215546Sopenharmony_ci               sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1;
6593bf215546Sopenharmony_ci               sob.StreamOutputBufferOffsetAddress =
6594bf215546Sopenharmony_ci                  rw_bo(crocus_resource_bo(&tgt->offset_res->base.b), tgt->offset_offset);
6595bf215546Sopenharmony_ci               if (tgt->zero_offset) {
6596bf215546Sopenharmony_ci                  sob.StreamOffset = 0;
6597bf215546Sopenharmony_ci                  tgt->zero_offset = false;
6598bf215546Sopenharmony_ci               } else
6599bf215546Sopenharmony_ci                  sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */
6600bf215546Sopenharmony_ci#endif
6601bf215546Sopenharmony_ci            }
6602bf215546Sopenharmony_ci         }
6603bf215546Sopenharmony_ci      }
6604bf215546Sopenharmony_ci
6605bf215546Sopenharmony_ci      if ((dirty & CROCUS_DIRTY_SO_DECL_LIST) && ice->state.streamout) {
6606bf215546Sopenharmony_ci         uint32_t *decl_list =
6607bf215546Sopenharmony_ci            ice->state.streamout + GENX(3DSTATE_STREAMOUT_length);
6608bf215546Sopenharmony_ci         crocus_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2));
6609bf215546Sopenharmony_ci      }
6610bf215546Sopenharmony_ci
6611bf215546Sopenharmony_ci      if (dirty & CROCUS_DIRTY_STREAMOUT) {
6612bf215546Sopenharmony_ci         const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
6613bf215546Sopenharmony_ci
6614bf215546Sopenharmony_ci         uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)];
6615bf215546Sopenharmony_ci         crocus_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) {
6616bf215546Sopenharmony_ci            sol.SOFunctionEnable = true;
6617bf215546Sopenharmony_ci            sol.SOStatisticsEnable = true;
6618bf215546Sopenharmony_ci
6619bf215546Sopenharmony_ci            sol.RenderingDisable = cso_rast->cso.rasterizer_discard &&
6620bf215546Sopenharmony_ci                                   !ice->state.prims_generated_query_active;
6621bf215546Sopenharmony_ci            sol.ReorderMode = cso_rast->cso.flatshade_first ? LEADING : TRAILING;
6622bf215546Sopenharmony_ci         }
6623bf215546Sopenharmony_ci
6624bf215546Sopenharmony_ci         assert(ice->state.streamout);
6625bf215546Sopenharmony_ci
6626bf215546Sopenharmony_ci         crocus_emit_merge(batch, ice->state.streamout, dynamic_sol,
6627bf215546Sopenharmony_ci                         GENX(3DSTATE_STREAMOUT_length));
6628bf215546Sopenharmony_ci      }
6629bf215546Sopenharmony_ci   } else {
6630bf215546Sopenharmony_ci      if (dirty & CROCUS_DIRTY_STREAMOUT) {
6631bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol);
6632bf215546Sopenharmony_ci      }
6633bf215546Sopenharmony_ci   }
6634bf215546Sopenharmony_ci#endif
6635bf215546Sopenharmony_ci#if GFX_VER == 6
6636bf215546Sopenharmony_ci   if (ice->state.streamout_active) {
6637bf215546Sopenharmony_ci      if (dirty & CROCUS_DIRTY_GEN6_SVBI) {
6638bf215546Sopenharmony_ci         crocus_emit_so_svbi(ice);
6639bf215546Sopenharmony_ci      }
6640bf215546Sopenharmony_ci   }
6641bf215546Sopenharmony_ci#endif
6642bf215546Sopenharmony_ci
6643bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_CLIP) {
6644bf215546Sopenharmony_ci#if GFX_VER < 6
6645bf215546Sopenharmony_ci      const struct brw_clip_prog_data *clip_prog_data = (struct brw_clip_prog_data *)ice->shaders.clip_prog->prog_data;
6646bf215546Sopenharmony_ci      struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;
6647bf215546Sopenharmony_ci
6648bf215546Sopenharmony_ci      uint32_t *clip_ptr = stream_state(batch, GENX(CLIP_STATE_length) * 4, 32, &ice->shaders.clip_offset);
6649bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6650bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(CLIP_STATE), clip_ptr, clip) {
6651bf215546Sopenharmony_ci         clip.KernelStartPointer = KSP(ice, ice->shaders.clip_prog);
6652bf215546Sopenharmony_ci         clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
6653bf215546Sopenharmony_ci         clip.SingleProgramFlow = true;
6654bf215546Sopenharmony_ci         clip.GRFRegisterCount = DIV_ROUND_UP(clip_prog_data->total_grf, 16) - 1;
6655bf215546Sopenharmony_ci
6656bf215546Sopenharmony_ci         clip.VertexURBEntryReadLength = clip_prog_data->urb_read_length;
6657bf215546Sopenharmony_ci         clip.ConstantURBEntryReadLength = clip_prog_data->curb_read_length;
6658bf215546Sopenharmony_ci
6659bf215546Sopenharmony_ci         clip.DispatchGRFStartRegisterForURBData = 1;
6660bf215546Sopenharmony_ci         clip.VertexURBEntryReadOffset = 0;
6661bf215546Sopenharmony_ci         clip.ConstantURBEntryReadOffset = ice->curbe.clip_start * 2;
6662bf215546Sopenharmony_ci
6663bf215546Sopenharmony_ci         clip.NumberofURBEntries = batch->ice->urb.nr_clip_entries;
6664bf215546Sopenharmony_ci         clip.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
6665bf215546Sopenharmony_ci
6666bf215546Sopenharmony_ci         if (batch->ice->urb.nr_clip_entries >= 10) {
6667bf215546Sopenharmony_ci            /* Half of the URB entries go to each thread, and it has to be an
6668bf215546Sopenharmony_ci             * even number.
6669bf215546Sopenharmony_ci             */
6670bf215546Sopenharmony_ci            assert(batch->ice->urb.nr_clip_entries % 2 == 0);
6671bf215546Sopenharmony_ci
6672bf215546Sopenharmony_ci            /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
6673bf215546Sopenharmony_ci             * only 2 threads can output VUEs at a time.
6674bf215546Sopenharmony_ci             */
6675bf215546Sopenharmony_ci            clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1;
6676bf215546Sopenharmony_ci         } else {
6677bf215546Sopenharmony_ci            assert(batch->ice->urb.nr_clip_entries >= 5);
6678bf215546Sopenharmony_ci            clip.MaximumNumberofThreads = 1 - 1;
6679bf215546Sopenharmony_ci         }
6680bf215546Sopenharmony_ci         clip.VertexPositionSpace = VPOS_NDCSPACE;
6681bf215546Sopenharmony_ci         clip.UserClipFlagsMustClipEnable = true;
6682bf215546Sopenharmony_ci         clip.GuardbandClipTestEnable = true;
6683bf215546Sopenharmony_ci
6684bf215546Sopenharmony_ci         clip.ClipperViewportStatePointer = ro_bo(batch->state.bo, ice->state.clip_vp_address);
6685bf215546Sopenharmony_ci         clip.ScreenSpaceViewportXMin = -1.0;
6686bf215546Sopenharmony_ci         clip.ScreenSpaceViewportXMax = 1.0;
6687bf215546Sopenharmony_ci         clip.ScreenSpaceViewportYMin = -1.0;
6688bf215546Sopenharmony_ci         clip.ScreenSpaceViewportYMax = 1.0;
6689bf215546Sopenharmony_ci         clip.ViewportXYClipTestEnable = true;
6690bf215546Sopenharmony_ci         clip.ViewportZClipTestEnable = (cso_state->depth_clip_near || cso_state->depth_clip_far);
6691bf215546Sopenharmony_ci
6692bf215546Sopenharmony_ci#if GFX_VER == 5 || GFX_VERx10 == 45
6693bf215546Sopenharmony_ci         clip.UserClipDistanceClipTestEnableBitmask = cso_state->clip_plane_enable;
6694bf215546Sopenharmony_ci#else
6695bf215546Sopenharmony_ci         /* Up to 6 actual clip flags, plus the 7th for the negative RHW
6696bf215546Sopenharmony_ci          * workaround.
6697bf215546Sopenharmony_ci          */
6698bf215546Sopenharmony_ci         clip.UserClipDistanceClipTestEnableBitmask = (cso_state->clip_plane_enable & 0x3f) | 0x40;
6699bf215546Sopenharmony_ci#endif
6700bf215546Sopenharmony_ci
6701bf215546Sopenharmony_ci         clip.APIMode = cso_state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
6702bf215546Sopenharmony_ci         clip.GuardbandClipTestEnable = true;
6703bf215546Sopenharmony_ci
6704bf215546Sopenharmony_ci         clip.ClipMode = clip_prog_data->clip_mode;
6705bf215546Sopenharmony_ci#if GFX_VERx10 == 45
6706bf215546Sopenharmony_ci         clip.NegativeWClipTestEnable = true;
6707bf215546Sopenharmony_ci#endif
6708bf215546Sopenharmony_ci      }
6709bf215546Sopenharmony_ci
6710bf215546Sopenharmony_ci#else //if GFX_VER >= 6
6711bf215546Sopenharmony_ci      struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;
6712bf215546Sopenharmony_ci      const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data );
6713bf215546Sopenharmony_ci      struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
6714bf215546Sopenharmony_ci      bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] ||
6715bf215546Sopenharmony_ci                       ice->shaders.prog[MESA_SHADER_TESS_EVAL];
6716bf215546Sopenharmony_ci      bool points_or_lines = cso_rast->fill_mode_point_or_line ||
6717bf215546Sopenharmony_ci         (gs_or_tes ? ice->shaders.output_topology_is_points_or_lines
6718bf215546Sopenharmony_ci                    : ice->state.prim_is_points_or_lines);
6719bf215546Sopenharmony_ci      uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
6720bf215546Sopenharmony_ci      crocus_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
6721bf215546Sopenharmony_ci         cl.StatisticsEnable = ice->state.statistics_counters_enabled;
6722bf215546Sopenharmony_ci         if (cso_rast->cso.rasterizer_discard)
6723bf215546Sopenharmony_ci            cl.ClipMode = CLIPMODE_REJECT_ALL;
6724bf215546Sopenharmony_ci         else if (ice->state.window_space_position)
6725bf215546Sopenharmony_ci            cl.ClipMode = CLIPMODE_ACCEPT_ALL;
6726bf215546Sopenharmony_ci         else
6727bf215546Sopenharmony_ci            cl.ClipMode = CLIPMODE_NORMAL;
6728bf215546Sopenharmony_ci
6729bf215546Sopenharmony_ci         cl.PerspectiveDivideDisable = ice->state.window_space_position;
6730bf215546Sopenharmony_ci         cl.ViewportXYClipTestEnable = !points_or_lines;
6731bf215546Sopenharmony_ci
6732bf215546Sopenharmony_ci         cl.UserClipDistanceCullTestEnableBitmask =
6733bf215546Sopenharmony_ci            brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->cull_distance_mask;
6734bf215546Sopenharmony_ci
6735bf215546Sopenharmony_ci         cl.NonPerspectiveBarycentricEnable = wm_prog_data->uses_nonperspective_interp_modes;
6736bf215546Sopenharmony_ci
6737bf215546Sopenharmony_ci         cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1;
6738bf215546Sopenharmony_ci         cl.MaximumVPIndex = ice->state.num_viewports - 1;
6739bf215546Sopenharmony_ci      }
6740bf215546Sopenharmony_ci      crocus_emit_merge(batch, cso_rast->clip, dynamic_clip,
6741bf215546Sopenharmony_ci                      ARRAY_SIZE(cso_rast->clip));
6742bf215546Sopenharmony_ci#endif
6743bf215546Sopenharmony_ci   }
6744bf215546Sopenharmony_ci
6745bf215546Sopenharmony_ci   if (stage_dirty & CROCUS_STAGE_DIRTY_VS) {
6746bf215546Sopenharmony_ci      struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_VERTEX];
6747bf215546Sopenharmony_ci      const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6748bf215546Sopenharmony_ci      const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
6749bf215546Sopenharmony_ci#if GFX_VER == 7
6750bf215546Sopenharmony_ci      if (batch->screen->devinfo.platform == INTEL_PLATFORM_IVB)
6751bf215546Sopenharmony_ci         gen7_emit_vs_workaround_flush(batch);
6752bf215546Sopenharmony_ci#endif
6753bf215546Sopenharmony_ci
6754bf215546Sopenharmony_ci
6755bf215546Sopenharmony_ci#if GFX_VER == 6
6756bf215546Sopenharmony_ci      struct push_bos push_bos = {};
6757bf215546Sopenharmony_ci      setup_constant_buffers(ice, batch, MESA_SHADER_VERTEX, &push_bos);
6758bf215546Sopenharmony_ci
6759bf215546Sopenharmony_ci      emit_push_constant_packets(ice, batch, MESA_SHADER_VERTEX, &push_bos);
6760bf215546Sopenharmony_ci#endif
6761bf215546Sopenharmony_ci#if GFX_VER >= 6
6762bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VS), vs)
6763bf215546Sopenharmony_ci#else
6764bf215546Sopenharmony_ci      uint32_t *vs_ptr = stream_state(batch,
6765bf215546Sopenharmony_ci                                      GENX(VS_STATE_length) * 4, 32, &ice->shaders.vs_offset);
6766bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6767bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(VS_STATE), vs_ptr, vs)
6768bf215546Sopenharmony_ci#endif
6769bf215546Sopenharmony_ci      {
6770bf215546Sopenharmony_ci         INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX);
6771bf215546Sopenharmony_ci
6772bf215546Sopenharmony_ci         vs.MaximumNumberofThreads = batch->screen->devinfo.max_vs_threads - 1;
6773bf215546Sopenharmony_ci
6774bf215546Sopenharmony_ci#if GFX_VER < 6
6775bf215546Sopenharmony_ci         vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
6776bf215546Sopenharmony_ci         vs.ConstantURBEntryReadLength = vue_prog_data->base.curb_read_length;
6777bf215546Sopenharmony_ci         vs.ConstantURBEntryReadOffset = ice->curbe.vs_start * 2;
6778bf215546Sopenharmony_ci
6779bf215546Sopenharmony_ci         vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0);
6780bf215546Sopenharmony_ci         vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
6781bf215546Sopenharmony_ci
6782bf215546Sopenharmony_ci         vs.MaximumNumberofThreads =
6783bf215546Sopenharmony_ci            CLAMP(batch->ice->urb.nr_vs_entries / 2, 1, batch->screen->devinfo.max_vs_threads) - 1;
6784bf215546Sopenharmony_ci         vs.StatisticsEnable = false;
6785bf215546Sopenharmony_ci         vs.SamplerStatePointer = ro_bo(batch->state.bo, ice->state.shaders[MESA_SHADER_VERTEX].sampler_offset);
6786bf215546Sopenharmony_ci#endif
6787bf215546Sopenharmony_ci#if GFX_VER == 5
6788bf215546Sopenharmony_ci         /* Force single program flow on Ironlake.  We cannot reliably get
6789bf215546Sopenharmony_ci          * all applications working without it.  See:
6790bf215546Sopenharmony_ci          * https://bugs.freedesktop.org/show_bug.cgi?id=29172
6791bf215546Sopenharmony_ci          *
6792bf215546Sopenharmony_ci          * The most notable and reliably failing application is the Humus
6793bf215546Sopenharmony_ci          * demo "CelShading"
6794bf215546Sopenharmony_ci          */
6795bf215546Sopenharmony_ci         vs.SingleProgramFlow = true;
6796bf215546Sopenharmony_ci         vs.SamplerCount = 0; /* hardware requirement */
6797bf215546Sopenharmony_ci
6798bf215546Sopenharmony_ci#endif
6799bf215546Sopenharmony_ci#if GFX_VER >= 8
6800bf215546Sopenharmony_ci         vs.SIMD8DispatchEnable =
6801bf215546Sopenharmony_ci            vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
6802bf215546Sopenharmony_ci
6803bf215546Sopenharmony_ci         vs.UserClipDistanceCullTestEnableBitmask =
6804bf215546Sopenharmony_ci            vue_prog_data->cull_distance_mask;
6805bf215546Sopenharmony_ci#endif
6806bf215546Sopenharmony_ci      }
6807bf215546Sopenharmony_ci
6808bf215546Sopenharmony_ci#if GFX_VER == 6
6809bf215546Sopenharmony_ci      crocus_emit_pipe_control_flush(batch,
6810bf215546Sopenharmony_ci                                     "post VS const",
6811bf215546Sopenharmony_ci                                     PIPE_CONTROL_DEPTH_STALL |
6812bf215546Sopenharmony_ci                                     PIPE_CONTROL_INSTRUCTION_INVALIDATE |
6813bf215546Sopenharmony_ci                                     PIPE_CONTROL_STATE_CACHE_INVALIDATE);
6814bf215546Sopenharmony_ci#endif
6815bf215546Sopenharmony_ci   }
6816bf215546Sopenharmony_ci
6817bf215546Sopenharmony_ci   if (stage_dirty & CROCUS_STAGE_DIRTY_GS) {
6818bf215546Sopenharmony_ci      struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_GEOMETRY];
6819bf215546Sopenharmony_ci      bool active = GFX_VER >= 6 && shader;
6820bf215546Sopenharmony_ci#if GFX_VER == 6
6821bf215546Sopenharmony_ci      struct push_bos push_bos = {};
6822bf215546Sopenharmony_ci      if (shader)
6823bf215546Sopenharmony_ci         setup_constant_buffers(ice, batch, MESA_SHADER_GEOMETRY, &push_bos);
6824bf215546Sopenharmony_ci
6825bf215546Sopenharmony_ci      emit_push_constant_packets(ice, batch, MESA_SHADER_GEOMETRY, &push_bos);
6826bf215546Sopenharmony_ci#endif
6827bf215546Sopenharmony_ci#if GFX_VERx10 == 70
6828bf215546Sopenharmony_ci   /**
6829bf215546Sopenharmony_ci    * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
6830bf215546Sopenharmony_ci    * Geometry > Geometry Shader > State:
6831bf215546Sopenharmony_ci    *
6832bf215546Sopenharmony_ci    *     "Note: Because of corruption in IVB:GT2, software needs to flush the
6833bf215546Sopenharmony_ci    *     whole fixed function pipeline when the GS enable changes value in
6834bf215546Sopenharmony_ci    *     the 3DSTATE_GS."
6835bf215546Sopenharmony_ci    *
6836bf215546Sopenharmony_ci    * The hardware architects have clarified that in this context "flush the
6837bf215546Sopenharmony_ci    * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
6838bf215546Sopenharmony_ci    * Stall" bit set.
6839bf215546Sopenharmony_ci    */
6840bf215546Sopenharmony_ci   if (batch->screen->devinfo.gt == 2 && ice->state.gs_enabled != active)
6841bf215546Sopenharmony_ci      gen7_emit_cs_stall_flush(batch);
6842bf215546Sopenharmony_ci#endif
6843bf215546Sopenharmony_ci#if GFX_VER >= 6
6844bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_GS), gs)
6845bf215546Sopenharmony_ci#else
6846bf215546Sopenharmony_ci      uint32_t *gs_ptr = stream_state(batch,
6847bf215546Sopenharmony_ci                                      GENX(GS_STATE_length) * 4, 32, &ice->shaders.gs_offset);
6848bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
6849bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(GS_STATE), gs_ptr, gs)
6850bf215546Sopenharmony_ci#endif
6851bf215546Sopenharmony_ci     {
6852bf215546Sopenharmony_ci#if GFX_VER >= 6
6853bf215546Sopenharmony_ci         if (active) {
6854bf215546Sopenharmony_ci            const struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(shader->prog_data);
6855bf215546Sopenharmony_ci            const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6856bf215546Sopenharmony_ci            const struct brw_stage_prog_data *prog_data = &gs_prog_data->base.base;
6857bf215546Sopenharmony_ci
6858bf215546Sopenharmony_ci            INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY);
6859bf215546Sopenharmony_ci#if GFX_VER >= 7
6860bf215546Sopenharmony_ci            gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
6861bf215546Sopenharmony_ci            gs.OutputTopology = gs_prog_data->output_topology;
6862bf215546Sopenharmony_ci            gs.ControlDataHeaderSize =
6863bf215546Sopenharmony_ci               gs_prog_data->control_data_header_size_hwords;
6864bf215546Sopenharmony_ci
6865bf215546Sopenharmony_ci            gs.InstanceControl = gs_prog_data->invocations - 1;
6866bf215546Sopenharmony_ci            gs.DispatchMode = vue_prog_data->dispatch_mode;
6867bf215546Sopenharmony_ci
6868bf215546Sopenharmony_ci            gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
6869bf215546Sopenharmony_ci
6870bf215546Sopenharmony_ci            gs.ControlDataFormat = gs_prog_data->control_data_format;
6871bf215546Sopenharmony_ci#endif
6872bf215546Sopenharmony_ci
6873bf215546Sopenharmony_ci            /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
6874bf215546Sopenharmony_ci             * Ivy Bridge and Haswell.
6875bf215546Sopenharmony_ci             *
6876bf215546Sopenharmony_ci             * On Ivy Bridge, setting this bit causes the vertices of a triangle
6877bf215546Sopenharmony_ci             * strip to be delivered to the geometry shader in an order that does
6878bf215546Sopenharmony_ci             * not strictly follow the OpenGL spec, but preserves triangle
6879bf215546Sopenharmony_ci             * orientation.  For example, if the vertices are (1, 2, 3, 4, 5), then
6880bf215546Sopenharmony_ci             * the geometry shader sees triangles:
6881bf215546Sopenharmony_ci             *
6882bf215546Sopenharmony_ci             * (1, 2, 3), (2, 4, 3), (3, 4, 5)
6883bf215546Sopenharmony_ci             *
6884bf215546Sopenharmony_ci             * (Clearing the bit is even worse, because it fails to preserve
6885bf215546Sopenharmony_ci             * orientation).
6886bf215546Sopenharmony_ci             *
6887bf215546Sopenharmony_ci             * Triangle strips with adjacency always ordered in a way that preserves
6888bf215546Sopenharmony_ci             * triangle orientation but does not strictly follow the OpenGL spec,
6889bf215546Sopenharmony_ci             * regardless of the setting of this bit.
6890bf215546Sopenharmony_ci             *
6891bf215546Sopenharmony_ci             * On Haswell, both triangle strips and triangle strips with adjacency
6892bf215546Sopenharmony_ci             * are always ordered in a way that preserves triangle orientation.
6893bf215546Sopenharmony_ci             * Setting this bit causes the ordering to strictly follow the OpenGL
6894bf215546Sopenharmony_ci             * spec.
6895bf215546Sopenharmony_ci             *
6896bf215546Sopenharmony_ci             * So in either case we want to set the bit.  Unfortunately on Ivy
6897bf215546Sopenharmony_ci             * Bridge this will get the order close to correct but not perfect.
6898bf215546Sopenharmony_ci             */
6899bf215546Sopenharmony_ci            gs.ReorderMode = TRAILING;
6900bf215546Sopenharmony_ci            gs.MaximumNumberofThreads =
6901bf215546Sopenharmony_ci               GFX_VER == 8 ? (batch->screen->devinfo.max_gs_threads / 2 - 1) :
6902bf215546Sopenharmony_ci               (batch->screen->devinfo.max_gs_threads - 1);
6903bf215546Sopenharmony_ci#if GFX_VER < 7
6904bf215546Sopenharmony_ci            gs.SOStatisticsEnable = true;
6905bf215546Sopenharmony_ci            if (gs_prog_data->num_transform_feedback_bindings)
6906bf215546Sopenharmony_ci               gs.SVBIPayloadEnable = ice->state.streamout_active;
6907bf215546Sopenharmony_ci
6908bf215546Sopenharmony_ci            /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
6909bf215546Sopenharmony_ci             * was previously done for gen6.
6910bf215546Sopenharmony_ci             *
6911bf215546Sopenharmony_ci             * TODO: test with both disabled to see if the HW is behaving
6912bf215546Sopenharmony_ci             * as expected, like in gen7.
6913bf215546Sopenharmony_ci             */
6914bf215546Sopenharmony_ci            gs.SingleProgramFlow = true;
6915bf215546Sopenharmony_ci            gs.VectorMaskEnable = true;
6916bf215546Sopenharmony_ci#endif
6917bf215546Sopenharmony_ci#if GFX_VER >= 8
6918bf215546Sopenharmony_ci            gs.ExpectedVertexCount = gs_prog_data->vertices_in;
6919bf215546Sopenharmony_ci
6920bf215546Sopenharmony_ci            if (gs_prog_data->static_vertex_count != -1) {
6921bf215546Sopenharmony_ci               gs.StaticOutput = true;
6922bf215546Sopenharmony_ci               gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
6923bf215546Sopenharmony_ci            }
6924bf215546Sopenharmony_ci            gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
6925bf215546Sopenharmony_ci
6926bf215546Sopenharmony_ci            gs.UserClipDistanceCullTestEnableBitmask =
6927bf215546Sopenharmony_ci               vue_prog_data->cull_distance_mask;
6928bf215546Sopenharmony_ci
6929bf215546Sopenharmony_ci            const int urb_entry_write_offset = 1;
6930bf215546Sopenharmony_ci            const uint32_t urb_entry_output_length =
6931bf215546Sopenharmony_ci               DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
6932bf215546Sopenharmony_ci               urb_entry_write_offset;
6933bf215546Sopenharmony_ci
6934bf215546Sopenharmony_ci            gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
6935bf215546Sopenharmony_ci            gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
6936bf215546Sopenharmony_ci#endif
6937bf215546Sopenharmony_ci         }
6938bf215546Sopenharmony_ci#endif
6939bf215546Sopenharmony_ci#if GFX_VER <= 6
6940bf215546Sopenharmony_ci         if (!active && ice->shaders.ff_gs_prog) {
6941bf215546Sopenharmony_ci            const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data;
6942bf215546Sopenharmony_ci            /* In gen6, transform feedback for the VS stage is done with an
6943bf215546Sopenharmony_ci             * ad-hoc GS program. This function provides the needed 3DSTATE_GS
6944bf215546Sopenharmony_ci             * for this.
6945bf215546Sopenharmony_ci             */
6946bf215546Sopenharmony_ci            gs.KernelStartPointer = KSP(ice, ice->shaders.ff_gs_prog);
6947bf215546Sopenharmony_ci            gs.SingleProgramFlow = true;
6948bf215546Sopenharmony_ci            gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1;
6949bf215546Sopenharmony_ci            gs.VertexURBEntryReadLength = gs_prog_data->urb_read_length;
6950bf215546Sopenharmony_ci
6951bf215546Sopenharmony_ci#if GFX_VER <= 5
6952bf215546Sopenharmony_ci            gs.GRFRegisterCount =
6953bf215546Sopenharmony_ci               DIV_ROUND_UP(gs_prog_data->total_grf, 16) - 1;
6954bf215546Sopenharmony_ci            /* BRW_NEW_URB_FENCE */
6955bf215546Sopenharmony_ci            gs.NumberofURBEntries = batch->ice->urb.nr_gs_entries;
6956bf215546Sopenharmony_ci            gs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
6957bf215546Sopenharmony_ci            gs.MaximumNumberofThreads = batch->ice->urb.nr_gs_entries >= 8 ? 1 : 0;
6958bf215546Sopenharmony_ci            gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
6959bf215546Sopenharmony_ci#else
6960bf215546Sopenharmony_ci            gs.Enable = true;
6961bf215546Sopenharmony_ci            gs.VectorMaskEnable = true;
6962bf215546Sopenharmony_ci            gs.SVBIPayloadEnable = true;
6963bf215546Sopenharmony_ci            gs.SVBIPostIncrementEnable = true;
6964bf215546Sopenharmony_ci            gs.SVBIPostIncrementValue = gs_prog_data->svbi_postincrement_value;
6965bf215546Sopenharmony_ci            gs.SOStatisticsEnable = true;
6966bf215546Sopenharmony_ci            gs.MaximumNumberofThreads = batch->screen->devinfo.max_gs_threads - 1;
6967bf215546Sopenharmony_ci#endif
6968bf215546Sopenharmony_ci         }
6969bf215546Sopenharmony_ci#endif
6970bf215546Sopenharmony_ci         if (!active && !ice->shaders.ff_gs_prog) {
6971bf215546Sopenharmony_ci#if GFX_VER < 8
6972bf215546Sopenharmony_ci            gs.DispatchGRFStartRegisterForURBData = 1;
6973bf215546Sopenharmony_ci#if GFX_VER >= 7
6974bf215546Sopenharmony_ci            gs.IncludeVertexHandles = true;
6975bf215546Sopenharmony_ci#endif
6976bf215546Sopenharmony_ci#endif
6977bf215546Sopenharmony_ci         }
6978bf215546Sopenharmony_ci#if GFX_VER >= 6
6979bf215546Sopenharmony_ci         gs.StatisticsEnable = true;
6980bf215546Sopenharmony_ci#endif
6981bf215546Sopenharmony_ci#if GFX_VER == 5 || GFX_VER == 6
6982bf215546Sopenharmony_ci         gs.RenderingEnabled = true;
6983bf215546Sopenharmony_ci#endif
6984bf215546Sopenharmony_ci#if GFX_VER <= 5
6985bf215546Sopenharmony_ci         gs.MaximumVPIndex = ice->state.num_viewports - 1;
6986bf215546Sopenharmony_ci#endif
6987bf215546Sopenharmony_ci      }
6988bf215546Sopenharmony_ci      ice->state.gs_enabled = active;
6989bf215546Sopenharmony_ci   }
6990bf215546Sopenharmony_ci
6991bf215546Sopenharmony_ci#if GFX_VER >= 7
6992bf215546Sopenharmony_ci   if (stage_dirty & CROCUS_STAGE_DIRTY_TCS) {
6993bf215546Sopenharmony_ci      struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_CTRL];
6994bf215546Sopenharmony_ci
6995bf215546Sopenharmony_ci      if (shader) {
6996bf215546Sopenharmony_ci         const struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(shader->prog_data);
6997bf215546Sopenharmony_ci         const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
6998bf215546Sopenharmony_ci         const struct brw_stage_prog_data *prog_data = &tcs_prog_data->base.base;
6999bf215546Sopenharmony_ci
7000bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs) {
7001bf215546Sopenharmony_ci            INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL);
7002bf215546Sopenharmony_ci            hs.InstanceCount = tcs_prog_data->instances - 1;
7003bf215546Sopenharmony_ci            hs.IncludeVertexHandles = true;
7004bf215546Sopenharmony_ci            hs.MaximumNumberofThreads = batch->screen->devinfo.max_tcs_threads - 1;
7005bf215546Sopenharmony_ci         }
7006bf215546Sopenharmony_ci      } else {
7007bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs);
7008bf215546Sopenharmony_ci      }
7009bf215546Sopenharmony_ci
7010bf215546Sopenharmony_ci   }
7011bf215546Sopenharmony_ci
7012bf215546Sopenharmony_ci   if (stage_dirty & CROCUS_STAGE_DIRTY_TES) {
7013bf215546Sopenharmony_ci      struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_EVAL];
7014bf215546Sopenharmony_ci      if (shader) {
7015bf215546Sopenharmony_ci         const struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(shader->prog_data);
7016bf215546Sopenharmony_ci         const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);
7017bf215546Sopenharmony_ci         const struct brw_stage_prog_data *prog_data = &tes_prog_data->base.base;
7018bf215546Sopenharmony_ci
7019bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_TE), te) {
7020bf215546Sopenharmony_ci            te.Partitioning = tes_prog_data->partitioning;
7021bf215546Sopenharmony_ci            te.OutputTopology = tes_prog_data->output_topology;
7022bf215546Sopenharmony_ci            te.TEDomain = tes_prog_data->domain;
7023bf215546Sopenharmony_ci            te.TEEnable = true;
7024bf215546Sopenharmony_ci            te.MaximumTessellationFactorOdd = 63.0;
7025bf215546Sopenharmony_ci            te.MaximumTessellationFactorNotOdd = 64.0;
7026bf215546Sopenharmony_ci         };
7027bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds) {
7028bf215546Sopenharmony_ci            INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL);
7029bf215546Sopenharmony_ci
7030bf215546Sopenharmony_ci            ds.MaximumNumberofThreads = batch->screen->devinfo.max_tes_threads - 1;
7031bf215546Sopenharmony_ci            ds.ComputeWCoordinateEnable =
7032bf215546Sopenharmony_ci               tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
7033bf215546Sopenharmony_ci
7034bf215546Sopenharmony_ci#if GFX_VER >= 8
7035bf215546Sopenharmony_ci            if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
7036bf215546Sopenharmony_ci               ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
7037bf215546Sopenharmony_ci            ds.UserClipDistanceCullTestEnableBitmask =
7038bf215546Sopenharmony_ci               vue_prog_data->cull_distance_mask;
7039bf215546Sopenharmony_ci#endif
7040bf215546Sopenharmony_ci         };
7041bf215546Sopenharmony_ci      } else {
7042bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_TE), te);
7043bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds);
7044bf215546Sopenharmony_ci      }
7045bf215546Sopenharmony_ci   }
7046bf215546Sopenharmony_ci#endif
7047bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_RASTER) {
7048bf215546Sopenharmony_ci
7049bf215546Sopenharmony_ci#if GFX_VER < 6
7050bf215546Sopenharmony_ci      const struct brw_sf_prog_data *sf_prog_data = (struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data;
7051bf215546Sopenharmony_ci      struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;
7052bf215546Sopenharmony_ci      uint32_t *sf_ptr = stream_state(batch,
7053bf215546Sopenharmony_ci                                      GENX(SF_STATE_length) * 4, 32, &ice->shaders.sf_offset);
7054bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
7055bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(SF_STATE), sf_ptr, sf) {
7056bf215546Sopenharmony_ci         sf.KernelStartPointer = KSP(ice, ice->shaders.sf_prog);
7057bf215546Sopenharmony_ci         sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
7058bf215546Sopenharmony_ci         sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
7059bf215546Sopenharmony_ci         sf.DispatchGRFStartRegisterForURBData = 3;
7060bf215546Sopenharmony_ci         sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
7061bf215546Sopenharmony_ci         sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
7062bf215546Sopenharmony_ci         sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1;
7063bf215546Sopenharmony_ci         sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries;
7064bf215546Sopenharmony_ci         sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
7065bf215546Sopenharmony_ci
7066bf215546Sopenharmony_ci         sf.SetupViewportStateOffset = ro_bo(batch->state.bo, ice->state.sf_vp_address);
7067bf215546Sopenharmony_ci
7068bf215546Sopenharmony_ci         sf.MaximumNumberofThreads =
7069bf215546Sopenharmony_ci            MIN2(GFX_VER == 5 ? 48 : 24, batch->ice->urb.nr_sf_entries) - 1;
7070bf215546Sopenharmony_ci
7071bf215546Sopenharmony_ci         sf.SpritePointEnable = cso_state->point_quad_rasterization;
7072bf215546Sopenharmony_ci         sf.DestinationOriginHorizontalBias = 0.5;
7073bf215546Sopenharmony_ci         sf.DestinationOriginVerticalBias = 0.5;
7074bf215546Sopenharmony_ci
7075bf215546Sopenharmony_ci	 sf.LineEndCapAntialiasingRegionWidth =
7076bf215546Sopenharmony_ci            cso_state->line_smooth ? _10pixels : _05pixels;
7077bf215546Sopenharmony_ci         sf.LastPixelEnable = cso_state->line_last_pixel;
7078bf215546Sopenharmony_ci         sf.AntialiasingEnable = cso_state->line_smooth;
7079bf215546Sopenharmony_ci
7080bf215546Sopenharmony_ci         sf.LineWidth = get_line_width(cso_state);
7081bf215546Sopenharmony_ci         sf.PointWidth = cso_state->point_size;
7082bf215546Sopenharmony_ci         sf.PointWidthSource = cso_state->point_size_per_vertex ? Vertex : State;
7083bf215546Sopenharmony_ci#if GFX_VERx10 >= 45
7084bf215546Sopenharmony_ci         sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
7085bf215546Sopenharmony_ci#endif
7086bf215546Sopenharmony_ci         sf.ViewportTransformEnable = true;
7087bf215546Sopenharmony_ci         sf.FrontWinding = cso_state->front_ccw ? 1 : 0;
7088bf215546Sopenharmony_ci         sf.ScissorRectangleEnable = true;
7089bf215546Sopenharmony_ci         sf.CullMode = translate_cull_mode(cso_state->cull_face);
7090bf215546Sopenharmony_ci
7091bf215546Sopenharmony_ci         if (cso_state->flatshade_first) {
7092bf215546Sopenharmony_ci            sf.TriangleFanProvokingVertexSelect = 1;
7093bf215546Sopenharmony_ci         } else {
7094bf215546Sopenharmony_ci            sf.TriangleStripListProvokingVertexSelect = 2;
7095bf215546Sopenharmony_ci            sf.TriangleFanProvokingVertexSelect = 2;
7096bf215546Sopenharmony_ci            sf.LineStripListProvokingVertexSelect = 1;
7097bf215546Sopenharmony_ci         }
7098bf215546Sopenharmony_ci      }
7099bf215546Sopenharmony_ci#else
7100bf215546Sopenharmony_ci      struct crocus_rasterizer_state *cso = ice->state.cso_rast;
7101bf215546Sopenharmony_ci      uint32_t dynamic_sf[GENX(3DSTATE_SF_length)];
7102bf215546Sopenharmony_ci      crocus_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) {
7103bf215546Sopenharmony_ci         sf.ViewportTransformEnable = !ice->state.window_space_position;
7104bf215546Sopenharmony_ci
7105bf215546Sopenharmony_ci#if GFX_VER == 6
7106bf215546Sopenharmony_ci         const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);
7107bf215546Sopenharmony_ci         uint32_t urb_entry_read_length;
7108bf215546Sopenharmony_ci         uint32_t urb_entry_read_offset;
7109bf215546Sopenharmony_ci         uint32_t point_sprite_enables;
7110bf215546Sopenharmony_ci         calculate_attr_overrides(ice, sf.Attribute, &point_sprite_enables,
7111bf215546Sopenharmony_ci                                  &urb_entry_read_length,
7112bf215546Sopenharmony_ci                                  &urb_entry_read_offset);
7113bf215546Sopenharmony_ci         sf.VertexURBEntryReadLength = urb_entry_read_length;
7114bf215546Sopenharmony_ci         sf.VertexURBEntryReadOffset = urb_entry_read_offset;
7115bf215546Sopenharmony_ci         sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
7116bf215546Sopenharmony_ci         sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
7117bf215546Sopenharmony_ci         sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
7118bf215546Sopenharmony_ci#endif
7119bf215546Sopenharmony_ci
7120bf215546Sopenharmony_ci#if GFX_VER >= 6 && GFX_VER < 8
7121bf215546Sopenharmony_ci         if (ice->state.framebuffer.samples > 1 && ice->state.cso_rast->cso.multisample)
7122bf215546Sopenharmony_ci            sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
7123bf215546Sopenharmony_ci#endif
7124bf215546Sopenharmony_ci#if GFX_VER == 7
7125bf215546Sopenharmony_ci         if (ice->state.framebuffer.zsbuf) {
7126bf215546Sopenharmony_ci            struct crocus_resource *zres, *sres;
7127bf215546Sopenharmony_ci               crocus_get_depth_stencil_resources(&batch->screen->devinfo,
7128bf215546Sopenharmony_ci                                                  ice->state.framebuffer.zsbuf->texture,
7129bf215546Sopenharmony_ci                                                  &zres, &sres);
7130bf215546Sopenharmony_ci            /* ANV thinks that the stencil-ness doesn't matter, this is just
7131bf215546Sopenharmony_ci             * about handling polygon offset scaling.
7132bf215546Sopenharmony_ci             */
7133bf215546Sopenharmony_ci            sf.DepthBufferSurfaceFormat = zres ? isl_format_get_depth_format(zres->surf.format, false) : D16_UNORM;
7134bf215546Sopenharmony_ci         }
7135bf215546Sopenharmony_ci#endif
7136bf215546Sopenharmony_ci      }
7137bf215546Sopenharmony_ci      crocus_emit_merge(batch, cso->sf, dynamic_sf,
7138bf215546Sopenharmony_ci                      ARRAY_SIZE(dynamic_sf));
7139bf215546Sopenharmony_ci#if GFX_VER == 8
7140bf215546Sopenharmony_ci      crocus_batch_emit(batch, cso->raster, sizeof(cso->raster));
7141bf215546Sopenharmony_ci#endif
7142bf215546Sopenharmony_ci#endif
7143bf215546Sopenharmony_ci   }
7144bf215546Sopenharmony_ci
7145bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_WM) {
7146bf215546Sopenharmony_ci      struct crocus_rasterizer_state *cso = ice->state.cso_rast;
7147bf215546Sopenharmony_ci      const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);
7148bf215546Sopenharmony_ci      UNUSED bool writes_depth = wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
7149bf215546Sopenharmony_ci      UNUSED const struct shader_info *fs_info =
7150bf215546Sopenharmony_ci         crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
7151bf215546Sopenharmony_ci
7152bf215546Sopenharmony_ci#if GFX_VER == 6
7153bf215546Sopenharmony_ci      struct push_bos push_bos = {};
7154bf215546Sopenharmony_ci      setup_constant_buffers(ice, batch, MESA_SHADER_FRAGMENT, &push_bos);
7155bf215546Sopenharmony_ci
7156bf215546Sopenharmony_ci      emit_push_constant_packets(ice, batch, MESA_SHADER_FRAGMENT, &push_bos);
7157bf215546Sopenharmony_ci#endif
7158bf215546Sopenharmony_ci#if GFX_VER >= 6
7159bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_WM), wm)
7160bf215546Sopenharmony_ci#else
7161bf215546Sopenharmony_ci      uint32_t *wm_ptr = stream_state(batch,
7162bf215546Sopenharmony_ci                                      GENX(WM_STATE_length) * 4, 32, &ice->shaders.wm_offset);
7163bf215546Sopenharmony_ci
7164bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;
7165bf215546Sopenharmony_ci
7166bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(WM_STATE), wm_ptr, wm)
7167bf215546Sopenharmony_ci#endif
7168bf215546Sopenharmony_ci     {
7169bf215546Sopenharmony_ci#if GFX_VER <= 6
7170bf215546Sopenharmony_ci         wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
7171bf215546Sopenharmony_ci         wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
7172bf215546Sopenharmony_ci         wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
7173bf215546Sopenharmony_ci#endif
7174bf215546Sopenharmony_ci#if GFX_VER == 4
7175bf215546Sopenharmony_ci      /* On gen4, we only have one shader kernel */
7176bf215546Sopenharmony_ci         if (brw_wm_state_has_ksp(wm, 0)) {
7177bf215546Sopenharmony_ci            wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]);
7178bf215546Sopenharmony_ci            wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
7179bf215546Sopenharmony_ci            wm.DispatchGRFStartRegisterForConstantSetupData0 =
7180bf215546Sopenharmony_ci               wm_prog_data->base.dispatch_grf_start_reg;
7181bf215546Sopenharmony_ci         }
7182bf215546Sopenharmony_ci#elif GFX_VER == 5
7183bf215546Sopenharmony_ci         wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7184bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
7185bf215546Sopenharmony_ci         wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7186bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
7187bf215546Sopenharmony_ci         wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7188bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
7189bf215546Sopenharmony_ci
7190bf215546Sopenharmony_ci         wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
7191bf215546Sopenharmony_ci         wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
7192bf215546Sopenharmony_ci         wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
7193bf215546Sopenharmony_ci
7194bf215546Sopenharmony_ci         wm.DispatchGRFStartRegisterForConstantSetupData0 =
7195bf215546Sopenharmony_ci            wm_prog_data->base.dispatch_grf_start_reg;
7196bf215546Sopenharmony_ci#elif GFX_VER == 6
7197bf215546Sopenharmony_ci         wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7198bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
7199bf215546Sopenharmony_ci         wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7200bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
7201bf215546Sopenharmony_ci         wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +
7202bf215546Sopenharmony_ci            brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
7203bf215546Sopenharmony_ci
7204bf215546Sopenharmony_ci         wm.DispatchGRFStartRegisterForConstantSetupData0 =
7205bf215546Sopenharmony_ci           brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
7206bf215546Sopenharmony_ci         wm.DispatchGRFStartRegisterForConstantSetupData1 =
7207bf215546Sopenharmony_ci           brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
7208bf215546Sopenharmony_ci         wm.DispatchGRFStartRegisterForConstantSetupData2 =
7209bf215546Sopenharmony_ci           brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
7210bf215546Sopenharmony_ci#endif
7211bf215546Sopenharmony_ci#if GFX_VER <= 5
7212bf215546Sopenharmony_ci         wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
7213bf215546Sopenharmony_ci         wm.ConstantURBEntryReadOffset = ice->curbe.wm_start * 2;
7214bf215546Sopenharmony_ci         wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
7215bf215546Sopenharmony_ci         wm.SetupURBEntryReadOffset = 0;
7216bf215546Sopenharmony_ci         wm.EarlyDepthTestEnable = true;
7217bf215546Sopenharmony_ci         wm.LineAntialiasingRegionWidth = _05pixels;
7218bf215546Sopenharmony_ci         wm.LineEndCapAntialiasingRegionWidth = _10pixels;
7219bf215546Sopenharmony_ci         wm.DepthCoefficientURBReadOffset = 1;
7220bf215546Sopenharmony_ci
7221bf215546Sopenharmony_ci         if (cso->cso.offset_tri) {
7222bf215546Sopenharmony_ci            wm.GlobalDepthOffsetEnable = true;
7223bf215546Sopenharmony_ci
7224bf215546Sopenharmony_ci         /* Something weird going on with legacy_global_depth_bias,
7225bf215546Sopenharmony_ci          * offset_constant, scaling and MRD.  This value passes glean
7226bf215546Sopenharmony_ci          * but gives some odd results elsewere (eg. the
7227bf215546Sopenharmony_ci          * quad-offset-units test).
7228bf215546Sopenharmony_ci          */
7229bf215546Sopenharmony_ci            wm.GlobalDepthOffsetConstant = cso->cso.offset_units * 2;
7230bf215546Sopenharmony_ci            wm.GlobalDepthOffsetScale = cso->cso.offset_scale;
7231bf215546Sopenharmony_ci         }
7232bf215546Sopenharmony_ci         wm.SamplerStatePointer = ro_bo(batch->state.bo,
7233bf215546Sopenharmony_ci                                        ice->state.shaders[MESA_SHADER_FRAGMENT].sampler_offset);
7234bf215546Sopenharmony_ci#endif
7235bf215546Sopenharmony_ci
7236bf215546Sopenharmony_ci         wm.StatisticsEnable = (GFX_VER >= 6 || ice->state.stats_wm) ?
7237bf215546Sopenharmony_ci            ice->state.statistics_counters_enabled : 0;
7238bf215546Sopenharmony_ci
7239bf215546Sopenharmony_ci#if GFX_VER >= 6
7240bf215546Sopenharmony_ci         wm.LineAntialiasingRegionWidth = _10pixels;
7241bf215546Sopenharmony_ci         wm.LineEndCapAntialiasingRegionWidth = _05pixels;
7242bf215546Sopenharmony_ci
7243bf215546Sopenharmony_ci         wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
7244bf215546Sopenharmony_ci         wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
7245bf215546Sopenharmony_ci#endif
7246bf215546Sopenharmony_ci#if GFX_VER == 6
7247bf215546Sopenharmony_ci      wm.DualSourceBlendEnable = wm_prog_data->dual_src_blend &&
7248bf215546Sopenharmony_ci         ice->state.cso_blend->dual_color_blending;
7249bf215546Sopenharmony_ci      wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
7250bf215546Sopenharmony_ci      wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
7251bf215546Sopenharmony_ci
7252bf215546Sopenharmony_ci      /* From the SNB PRM, volume 2 part 1, page 281:
7253bf215546Sopenharmony_ci       * "If the PS kernel does not need the Position XY Offsets
7254bf215546Sopenharmony_ci       * to compute a Position XY value, then this field should be
7255bf215546Sopenharmony_ci       * programmed to POSOFFSET_NONE."
7256bf215546Sopenharmony_ci       *
7257bf215546Sopenharmony_ci       * "SW Recommendation: If the PS kernel needs the Position Offsets
7258bf215546Sopenharmony_ci       * to compute a Position XY value, this field should match Position
7259bf215546Sopenharmony_ci       * ZW Interpolation Mode to ensure a consistent position.xyzw
7260bf215546Sopenharmony_ci       * computation."
7261bf215546Sopenharmony_ci       * We only require XY sample offsets. So, this recommendation doesn't
7262bf215546Sopenharmony_ci       * look useful at the moment. We might need this in future.
7263bf215546Sopenharmony_ci       */
7264bf215546Sopenharmony_ci      if (wm_prog_data->uses_pos_offset)
7265bf215546Sopenharmony_ci         wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
7266bf215546Sopenharmony_ci      else
7267bf215546Sopenharmony_ci         wm.PositionXYOffsetSelect = POSOFFSET_NONE;
7268bf215546Sopenharmony_ci#endif
7269bf215546Sopenharmony_ci         wm.LineStippleEnable = cso->cso.line_stipple_enable;
7270bf215546Sopenharmony_ci         wm.PolygonStippleEnable = cso->cso.poly_stipple_enable;
7271bf215546Sopenharmony_ci
7272bf215546Sopenharmony_ci#if GFX_VER < 7
7273bf215546Sopenharmony_ci         if (wm_prog_data->base.use_alt_mode)
7274bf215546Sopenharmony_ci            wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
7275bf215546Sopenharmony_ci         wm.BindingTableEntryCount = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bt.size_bytes / 4;
7276bf215546Sopenharmony_ci         wm.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1;
7277bf215546Sopenharmony_ci#endif
7278bf215546Sopenharmony_ci
7279bf215546Sopenharmony_ci#if GFX_VER < 8
7280bf215546Sopenharmony_ci#if GFX_VER >= 6
7281bf215546Sopenharmony_ci         wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
7282bf215546Sopenharmony_ci
7283bf215546Sopenharmony_ci         struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
7284bf215546Sopenharmony_ci         if (fb->samples > 1) {
7285bf215546Sopenharmony_ci            if (cso->cso.multisample)
7286bf215546Sopenharmony_ci               wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
7287bf215546Sopenharmony_ci            else
7288bf215546Sopenharmony_ci               wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
7289bf215546Sopenharmony_ci
7290bf215546Sopenharmony_ci            if (wm_prog_data->persample_dispatch)
7291bf215546Sopenharmony_ci               wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
7292bf215546Sopenharmony_ci            else
7293bf215546Sopenharmony_ci               wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
7294bf215546Sopenharmony_ci         } else {
7295bf215546Sopenharmony_ci            wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
7296bf215546Sopenharmony_ci            wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
7297bf215546Sopenharmony_ci         }
7298bf215546Sopenharmony_ci#endif
7299bf215546Sopenharmony_ci
7300bf215546Sopenharmony_ci         wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
7301bf215546Sopenharmony_ci
7302bf215546Sopenharmony_ci         if (wm_prog_data->uses_kill ||
7303bf215546Sopenharmony_ci             ice->state.cso_zsa->cso.alpha_enabled ||
7304bf215546Sopenharmony_ci             ice->state.cso_blend->cso.alpha_to_coverage ||
7305bf215546Sopenharmony_ci             (GFX_VER >= 6 && wm_prog_data->uses_omask))
7306bf215546Sopenharmony_ci            wm.PixelShaderKillsPixel = true;
7307bf215546Sopenharmony_ci
7308bf215546Sopenharmony_ci         if (has_writeable_rt(ice->state.cso_blend, fs_info) ||
7309bf215546Sopenharmony_ci             writes_depth || wm.PixelShaderKillsPixel ||
7310bf215546Sopenharmony_ci             (GFX_VER >= 6 && wm_prog_data->has_side_effects))
7311bf215546Sopenharmony_ci            wm.ThreadDispatchEnable = true;
7312bf215546Sopenharmony_ci
7313bf215546Sopenharmony_ci#if GFX_VER >= 7
7314bf215546Sopenharmony_ci         wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
7315bf215546Sopenharmony_ci         wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
7316bf215546Sopenharmony_ci#else
7317bf215546Sopenharmony_ci         if (wm_prog_data->base.total_scratch) {
7318bf215546Sopenharmony_ci            struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch,
7319bf215546Sopenharmony_ci                                                            MESA_SHADER_FRAGMENT);
7320bf215546Sopenharmony_ci            wm.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11;
7321bf215546Sopenharmony_ci            wm.ScratchSpaceBasePointer = rw_bo(bo, 0);
7322bf215546Sopenharmony_ci         }
7323bf215546Sopenharmony_ci
7324bf215546Sopenharmony_ci         wm.PixelShaderComputedDepth = writes_depth;
7325bf215546Sopenharmony_ci
7326bf215546Sopenharmony_ci#endif
7327bf215546Sopenharmony_ci         /* The "UAV access enable" bits are unnecessary on HSW because they only
7328bf215546Sopenharmony_ci          * seem to have an effect on the HW-assisted coherency mechanism which we
7329bf215546Sopenharmony_ci          * don't need, and the rasterization-related UAV_ONLY flag and the
7330bf215546Sopenharmony_ci          * DISPATCH_ENABLE bit can be set independently from it.
7331bf215546Sopenharmony_ci          * C.f. gen8_upload_ps_extra().
7332bf215546Sopenharmony_ci          *
7333bf215546Sopenharmony_ci          * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
7334bf215546Sopenharmony_ci          * _NEW_COLOR
7335bf215546Sopenharmony_ci          */
7336bf215546Sopenharmony_ci#if GFX_VERx10 == 75
7337bf215546Sopenharmony_ci         if (!(has_writeable_rt(ice->state.cso_blend, fs_info) || writes_depth) &&
7338bf215546Sopenharmony_ci             wm_prog_data->has_side_effects)
7339bf215546Sopenharmony_ci            wm.PSUAVonly = ON;
7340bf215546Sopenharmony_ci#endif
7341bf215546Sopenharmony_ci#endif
7342bf215546Sopenharmony_ci#if GFX_VER >= 7
7343bf215546Sopenharmony_ci      /* BRW_NEW_FS_PROG_DATA */
7344bf215546Sopenharmony_ci         if (wm_prog_data->early_fragment_tests)
7345bf215546Sopenharmony_ci           wm.EarlyDepthStencilControl = EDSC_PREPS;
7346bf215546Sopenharmony_ci         else if (wm_prog_data->has_side_effects)
7347bf215546Sopenharmony_ci           wm.EarlyDepthStencilControl = EDSC_PSEXEC;
7348bf215546Sopenharmony_ci#endif
7349bf215546Sopenharmony_ci#if GFX_VER == 8
7350bf215546Sopenharmony_ci         /* We could skip this bit if color writes are enabled. */
7351bf215546Sopenharmony_ci         if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill)
7352bf215546Sopenharmony_ci            wm.ForceThreadDispatchEnable = ForceON;
7353bf215546Sopenharmony_ci#endif
7354bf215546Sopenharmony_ci      };
7355bf215546Sopenharmony_ci
7356bf215546Sopenharmony_ci#if GFX_VER <= 5
7357bf215546Sopenharmony_ci      if (ice->state.global_depth_offset_clamp != cso->cso.offset_clamp) {
7358bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
7359bf215546Sopenharmony_ci            clamp.GlobalDepthOffsetClamp = cso->cso.offset_clamp;
7360bf215546Sopenharmony_ci         }
7361bf215546Sopenharmony_ci         ice->state.global_depth_offset_clamp = cso->cso.offset_clamp;
7362bf215546Sopenharmony_ci      }
7363bf215546Sopenharmony_ci#endif
7364bf215546Sopenharmony_ci   }
7365bf215546Sopenharmony_ci
7366bf215546Sopenharmony_ci#if GFX_VER >= 7
7367bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN7_SBE) {
7368bf215546Sopenharmony_ci      crocus_emit_sbe(batch, ice);
7369bf215546Sopenharmony_ci   }
7370bf215546Sopenharmony_ci#endif
7371bf215546Sopenharmony_ci
7372bf215546Sopenharmony_ci#if GFX_VER >= 8
7373bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN8_PS_BLEND) {
7374bf215546Sopenharmony_ci      struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];
7375bf215546Sopenharmony_ci      struct crocus_blend_state *cso_blend = ice->state.cso_blend;
7376bf215546Sopenharmony_ci      struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
7377bf215546Sopenharmony_ci      struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
7378bf215546Sopenharmony_ci      const struct shader_info *fs_info =
7379bf215546Sopenharmony_ci         crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
7380bf215546Sopenharmony_ci      uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];
7381bf215546Sopenharmony_ci      crocus_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {
7382bf215546Sopenharmony_ci         pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info);
7383bf215546Sopenharmony_ci         pb.AlphaTestEnable = cso_zsa->cso.alpha_enabled;
7384bf215546Sopenharmony_ci         pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) &&
7385bf215546Sopenharmony_ci            (!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);
7386bf215546Sopenharmony_ci      }
7387bf215546Sopenharmony_ci      crocus_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,
7388bf215546Sopenharmony_ci                        ARRAY_SIZE(cso_blend->ps_blend));
7389bf215546Sopenharmony_ci   }
7390bf215546Sopenharmony_ci#endif
7391bf215546Sopenharmony_ci
7392bf215546Sopenharmony_ci#if GFX_VER >= 6
7393bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL) {
7394bf215546Sopenharmony_ci
7395bf215546Sopenharmony_ci#if GFX_VER >= 8
7396bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
7397bf215546Sopenharmony_ci         set_depth_stencil_bits(ice, &wmds);
7398bf215546Sopenharmony_ci      }
7399bf215546Sopenharmony_ci#else
7400bf215546Sopenharmony_ci      uint32_t ds_offset;
7401bf215546Sopenharmony_ci      void *ds_map = stream_state(batch,
7402bf215546Sopenharmony_ci                                  sizeof(uint32_t) * GENX(DEPTH_STENCIL_STATE_length),
7403bf215546Sopenharmony_ci                                  64, &ds_offset);
7404bf215546Sopenharmony_ci      _crocus_pack_state(batch, GENX(DEPTH_STENCIL_STATE), ds_map, ds) {
7405bf215546Sopenharmony_ci         set_depth_stencil_bits(ice, &ds);
7406bf215546Sopenharmony_ci      }
7407bf215546Sopenharmony_ci
7408bf215546Sopenharmony_ci#if GFX_VER == 6
7409bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
7410bf215546Sopenharmony_ci         ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
7411bf215546Sopenharmony_ci         ptr.DEPTH_STENCIL_STATEChange = true;
7412bf215546Sopenharmony_ci      }
7413bf215546Sopenharmony_ci#else
7414bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
7415bf215546Sopenharmony_ci         ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
7416bf215546Sopenharmony_ci      }
7417bf215546Sopenharmony_ci#endif
7418bf215546Sopenharmony_ci#endif
7419bf215546Sopenharmony_ci   }
7420bf215546Sopenharmony_ci
7421bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN6_SCISSOR_RECT) {
7422bf215546Sopenharmony_ci      /* Align to 64-byte boundary as per anv. */
7423bf215546Sopenharmony_ci      uint32_t scissor_offset;
7424bf215546Sopenharmony_ci      struct pipe_scissor_state *scissor_map = (void *)
7425bf215546Sopenharmony_ci         stream_state(batch, sizeof(struct pipe_scissor_state) * ice->state.num_viewports,
7426bf215546Sopenharmony_ci                      64, &scissor_offset);
7427bf215546Sopenharmony_ci      for (int i = 0; i < ice->state.num_viewports; i++) {
7428bf215546Sopenharmony_ci         struct pipe_scissor_state scissor;
7429bf215546Sopenharmony_ci         crocus_fill_scissor_rect(ice, i, &scissor);
7430bf215546Sopenharmony_ci         scissor_map[i] = scissor;
7431bf215546Sopenharmony_ci      }
7432bf215546Sopenharmony_ci
7433bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
7434bf215546Sopenharmony_ci         ptr.ScissorRectPointer = scissor_offset;
7435bf215546Sopenharmony_ci      }
7436bf215546Sopenharmony_ci   }
7437bf215546Sopenharmony_ci#endif
7438bf215546Sopenharmony_ci
7439bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_DEPTH_BUFFER) {
7440bf215546Sopenharmony_ci      struct isl_device *isl_dev = &batch->screen->isl_dev;
7441bf215546Sopenharmony_ci#if GFX_VER >= 6
7442bf215546Sopenharmony_ci      crocus_emit_depth_stall_flushes(batch);
7443bf215546Sopenharmony_ci#endif
7444bf215546Sopenharmony_ci      void *batch_ptr;
7445bf215546Sopenharmony_ci      struct crocus_resource *zres, *sres;
7446bf215546Sopenharmony_ci      struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
7447bf215546Sopenharmony_ci      batch_ptr = crocus_get_command_space(batch, isl_dev->ds.size);
7448bf215546Sopenharmony_ci
7449bf215546Sopenharmony_ci      struct isl_view view = {
7450bf215546Sopenharmony_ci                              .base_level = 0,
7451bf215546Sopenharmony_ci                              .levels = 1,
7452bf215546Sopenharmony_ci                              .base_array_layer = 0,
7453bf215546Sopenharmony_ci                              .array_len = 1,
7454bf215546Sopenharmony_ci                              .swizzle = ISL_SWIZZLE_IDENTITY,
7455bf215546Sopenharmony_ci      };
7456bf215546Sopenharmony_ci      struct isl_depth_stencil_hiz_emit_info info = {
7457bf215546Sopenharmony_ci         .view = &view,
7458bf215546Sopenharmony_ci         .mocs = crocus_mocs(NULL, isl_dev),
7459bf215546Sopenharmony_ci      };
7460bf215546Sopenharmony_ci
7461bf215546Sopenharmony_ci      if (cso->zsbuf) {
7462bf215546Sopenharmony_ci         crocus_get_depth_stencil_resources(&batch->screen->devinfo, cso->zsbuf->texture, &zres, &sres);
7463bf215546Sopenharmony_ci         struct crocus_surface *zsbuf = (struct crocus_surface *)cso->zsbuf;
7464bf215546Sopenharmony_ci         if (zsbuf->align_res) {
7465bf215546Sopenharmony_ci            zres = (struct crocus_resource *)zsbuf->align_res;
7466bf215546Sopenharmony_ci         }
7467bf215546Sopenharmony_ci         view.base_level = cso->zsbuf->u.tex.level;
7468bf215546Sopenharmony_ci         view.base_array_layer = cso->zsbuf->u.tex.first_layer;
7469bf215546Sopenharmony_ci         view.array_len = cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1;
7470bf215546Sopenharmony_ci
7471bf215546Sopenharmony_ci         if (zres) {
7472bf215546Sopenharmony_ci            view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
7473bf215546Sopenharmony_ci
7474bf215546Sopenharmony_ci            info.depth_surf = &zres->surf;
7475bf215546Sopenharmony_ci            info.depth_address = crocus_command_reloc(batch,
7476bf215546Sopenharmony_ci                                                      (batch_ptr - batch->command.map) + isl_dev->ds.depth_offset,
7477bf215546Sopenharmony_ci                                                      zres->bo, 0, RELOC_32BIT);
7478bf215546Sopenharmony_ci
7479bf215546Sopenharmony_ci            info.mocs = crocus_mocs(zres->bo, isl_dev);
7480bf215546Sopenharmony_ci            view.format = zres->surf.format;
7481bf215546Sopenharmony_ci
7482bf215546Sopenharmony_ci            if (crocus_resource_level_has_hiz(zres, view.base_level)) {
7483bf215546Sopenharmony_ci               info.hiz_usage = zres->aux.usage;
7484bf215546Sopenharmony_ci               info.hiz_surf = &zres->aux.surf;
7485bf215546Sopenharmony_ci               uint64_t hiz_offset = 0;
7486bf215546Sopenharmony_ci
7487bf215546Sopenharmony_ci#if GFX_VER == 6
7488bf215546Sopenharmony_ci               /* HiZ surfaces on Sandy Bridge technically don't support
7489bf215546Sopenharmony_ci                * mip-mapping.  However, we can fake it by offsetting to the
7490bf215546Sopenharmony_ci                * first slice of LOD0 in the HiZ surface.
7491bf215546Sopenharmony_ci                */
7492bf215546Sopenharmony_ci               isl_surf_get_image_offset_B_tile_sa(&zres->aux.surf,
7493bf215546Sopenharmony_ci                                                   view.base_level, 0, 0,
7494bf215546Sopenharmony_ci                                                   &hiz_offset, NULL, NULL);
7495bf215546Sopenharmony_ci#endif
7496bf215546Sopenharmony_ci               info.hiz_address = crocus_command_reloc(batch,
7497bf215546Sopenharmony_ci                                                       (batch_ptr - batch->command.map) + isl_dev->ds.hiz_offset,
7498bf215546Sopenharmony_ci                                                       zres->aux.bo, zres->aux.offset + hiz_offset,
7499bf215546Sopenharmony_ci                                                       RELOC_32BIT);
7500bf215546Sopenharmony_ci               info.depth_clear_value = crocus_resource_get_clear_color(zres).f32[0];
7501bf215546Sopenharmony_ci            }
7502bf215546Sopenharmony_ci         }
7503bf215546Sopenharmony_ci
7504bf215546Sopenharmony_ci#if GFX_VER >= 6
7505bf215546Sopenharmony_ci         if (sres) {
7506bf215546Sopenharmony_ci            view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
7507bf215546Sopenharmony_ci            info.stencil_aux_usage = sres->aux.usage;
7508bf215546Sopenharmony_ci            info.stencil_surf = &sres->surf;
7509bf215546Sopenharmony_ci
7510bf215546Sopenharmony_ci            uint64_t stencil_offset = 0;
7511bf215546Sopenharmony_ci#if GFX_VER == 6
7512bf215546Sopenharmony_ci            /* Stencil surfaces on Sandy Bridge technically don't support
7513bf215546Sopenharmony_ci             * mip-mapping.  However, we can fake it by offsetting to the
7514bf215546Sopenharmony_ci             * first slice of LOD0 in the stencil surface.
7515bf215546Sopenharmony_ci             */
7516bf215546Sopenharmony_ci            isl_surf_get_image_offset_B_tile_sa(&sres->surf,
7517bf215546Sopenharmony_ci                                                view.base_level, 0, 0,
7518bf215546Sopenharmony_ci                                                &stencil_offset, NULL, NULL);
7519bf215546Sopenharmony_ci#endif
7520bf215546Sopenharmony_ci
7521bf215546Sopenharmony_ci            info.stencil_address = crocus_command_reloc(batch,
7522bf215546Sopenharmony_ci                                                        (batch_ptr - batch->command.map) + isl_dev->ds.stencil_offset,
7523bf215546Sopenharmony_ci                                                        sres->bo, stencil_offset, RELOC_32BIT);
7524bf215546Sopenharmony_ci            if (!zres) {
7525bf215546Sopenharmony_ci               view.format = sres->surf.format;
7526bf215546Sopenharmony_ci               info.mocs = crocus_mocs(sres->bo, isl_dev);
7527bf215546Sopenharmony_ci            }
7528bf215546Sopenharmony_ci         }
7529bf215546Sopenharmony_ci#endif
7530bf215546Sopenharmony_ci      }
7531bf215546Sopenharmony_ci      isl_emit_depth_stencil_hiz_s(isl_dev, batch_ptr, &info);
7532bf215546Sopenharmony_ci   }
7533bf215546Sopenharmony_ci
7534bf215546Sopenharmony_ci   /* TODO: Disable emitting this until something uses a stipple. */
7535bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_POLYGON_STIPPLE) {
7536bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
7537bf215546Sopenharmony_ci         for (int i = 0; i < 32; i++) {
7538bf215546Sopenharmony_ci            poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
7539bf215546Sopenharmony_ci         }
7540bf215546Sopenharmony_ci      }
7541bf215546Sopenharmony_ci   }
7542bf215546Sopenharmony_ci
7543bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_LINE_STIPPLE) {
7544bf215546Sopenharmony_ci      struct crocus_rasterizer_state *cso = ice->state.cso_rast;
7545bf215546Sopenharmony_ci      crocus_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
7546bf215546Sopenharmony_ci   }
7547bf215546Sopenharmony_ci
7548bf215546Sopenharmony_ci#if GFX_VER >= 8
7549bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN8_VF_TOPOLOGY) {
7550bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
7551bf215546Sopenharmony_ci         topo.PrimitiveTopologyType =
7552bf215546Sopenharmony_ci            translate_prim_type(draw->mode, ice->state.patch_vertices);
7553bf215546Sopenharmony_ci      }
7554bf215546Sopenharmony_ci   }
7555bf215546Sopenharmony_ci#endif
7556bf215546Sopenharmony_ci
7557bf215546Sopenharmony_ci#if GFX_VER <= 5
7558bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN5_PIPELINED_POINTERS) {
7559bf215546Sopenharmony_ci      upload_pipelined_state_pointers(batch, ice->shaders.ff_gs_prog ? true : false, ice->shaders.gs_offset,
7560bf215546Sopenharmony_ci                                      ice->shaders.vs_offset, ice->shaders.sf_offset,
7561bf215546Sopenharmony_ci                                      ice->shaders.clip_offset, ice->shaders.wm_offset, ice->shaders.cc_offset);
7562bf215546Sopenharmony_ci      crocus_upload_urb_fence(batch);
7563bf215546Sopenharmony_ci
7564bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(CS_URB_STATE), cs) {
7565bf215546Sopenharmony_ci        cs.NumberofURBEntries = ice->urb.nr_cs_entries;
7566bf215546Sopenharmony_ci        cs.URBEntryAllocationSize = ice->urb.csize - 1;
7567bf215546Sopenharmony_ci      }
7568bf215546Sopenharmony_ci      dirty |= CROCUS_DIRTY_GEN4_CURBE;
7569bf215546Sopenharmony_ci   }
7570bf215546Sopenharmony_ci#endif
7571bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_DRAWING_RECTANGLE) {
7572bf215546Sopenharmony_ci      struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
7573bf215546Sopenharmony_ci      if (fb->width && fb->height) {
7574bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
7575bf215546Sopenharmony_ci            rect.ClippedDrawingRectangleXMax = fb->width - 1;
7576bf215546Sopenharmony_ci            rect.ClippedDrawingRectangleYMax = fb->height - 1;
7577bf215546Sopenharmony_ci         }
7578bf215546Sopenharmony_ci      }
7579bf215546Sopenharmony_ci   }
7580bf215546Sopenharmony_ci
7581bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_VERTEX_BUFFERS) {
7582bf215546Sopenharmony_ci      const uint32_t user_count = util_bitcount(ice->state.bound_vertex_buffers);
7583bf215546Sopenharmony_ci      const uint32_t count = user_count +
7584bf215546Sopenharmony_ci         ice->state.vs_uses_draw_params + ice->state.vs_uses_derived_draw_params;
7585bf215546Sopenharmony_ci      uint32_t dynamic_bound = ice->state.bound_vertex_buffers;
7586bf215546Sopenharmony_ci
7587bf215546Sopenharmony_ci      if (count) {
7588bf215546Sopenharmony_ci         const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
7589bf215546Sopenharmony_ci
7590bf215546Sopenharmony_ci         uint32_t *map =
7591bf215546Sopenharmony_ci            crocus_get_command_space(batch, 4 * (1 + vb_dwords * count));
7592bf215546Sopenharmony_ci         _crocus_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) {
7593bf215546Sopenharmony_ci            vb.DWordLength = (vb_dwords * count + 1) - 2;
7594bf215546Sopenharmony_ci         }
7595bf215546Sopenharmony_ci         map += 1;
7596bf215546Sopenharmony_ci
7597bf215546Sopenharmony_ci         uint32_t bound = dynamic_bound;
7598bf215546Sopenharmony_ci         int i;
7599bf215546Sopenharmony_ci         while (bound) {
7600bf215546Sopenharmony_ci            i = u_bit_scan(&bound);
7601bf215546Sopenharmony_ci            struct pipe_vertex_buffer *buf = &ice->state.vertex_buffers[i];
7602bf215546Sopenharmony_ci            struct crocus_bo *bo = crocus_resource_bo(buf->buffer.resource);
7603bf215546Sopenharmony_ci            uint32_t step_rate = ice->state.cso_vertex_elements->step_rate[i];
7604bf215546Sopenharmony_ci
7605bf215546Sopenharmony_ci            emit_vertex_buffer_state(batch, i, bo,
7606bf215546Sopenharmony_ci                                     buf->buffer_offset,
7607bf215546Sopenharmony_ci                                     ice->state.vb_end[i],
7608bf215546Sopenharmony_ci                                     buf->stride,
7609bf215546Sopenharmony_ci                                     step_rate,
7610bf215546Sopenharmony_ci                                     &map);
7611bf215546Sopenharmony_ci         }
7612bf215546Sopenharmony_ci         i = user_count;
7613bf215546Sopenharmony_ci         if (ice->state.vs_uses_draw_params) {
7614bf215546Sopenharmony_ci            struct crocus_resource *res = (struct crocus_resource *)ice->draw.draw_params.res;
7615bf215546Sopenharmony_ci            emit_vertex_buffer_state(batch, i++,
7616bf215546Sopenharmony_ci                                     res->bo,
7617bf215546Sopenharmony_ci                                     ice->draw.draw_params.offset,
7618bf215546Sopenharmony_ci                                     ice->draw.draw_params.res->width0,
7619bf215546Sopenharmony_ci                                     0, 0, &map);
7620bf215546Sopenharmony_ci         }
7621bf215546Sopenharmony_ci         if (ice->state.vs_uses_derived_draw_params) {
7622bf215546Sopenharmony_ci            struct crocus_resource *res = (struct crocus_resource *)ice->draw.derived_draw_params.res;
7623bf215546Sopenharmony_ci            emit_vertex_buffer_state(batch, i++,
7624bf215546Sopenharmony_ci                                     res->bo,
7625bf215546Sopenharmony_ci                                     ice->draw.derived_draw_params.offset,
7626bf215546Sopenharmony_ci                                     ice->draw.derived_draw_params.res->width0,
7627bf215546Sopenharmony_ci                                     0, 0, &map);
7628bf215546Sopenharmony_ci         }
7629bf215546Sopenharmony_ci      }
7630bf215546Sopenharmony_ci   }
7631bf215546Sopenharmony_ci
7632bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_VERTEX_ELEMENTS) {
7633bf215546Sopenharmony_ci      struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements;
7634bf215546Sopenharmony_ci      const unsigned entries = MAX2(cso->count, 1);
7635bf215546Sopenharmony_ci      if (!(ice->state.vs_needs_sgvs_element ||
7636bf215546Sopenharmony_ci            ice->state.vs_uses_derived_draw_params ||
7637bf215546Sopenharmony_ci            ice->state.vs_needs_edge_flag)) {
7638bf215546Sopenharmony_ci         crocus_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
7639bf215546Sopenharmony_ci                         (1 + entries * GENX(VERTEX_ELEMENT_STATE_length)));
7640bf215546Sopenharmony_ci      } else {
7641bf215546Sopenharmony_ci         uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
7642bf215546Sopenharmony_ci         const unsigned dyn_count = cso->count +
7643bf215546Sopenharmony_ci            ice->state.vs_needs_sgvs_element +
7644bf215546Sopenharmony_ci            ice->state.vs_uses_derived_draw_params;
7645bf215546Sopenharmony_ci
7646bf215546Sopenharmony_ci         crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS),
7647bf215546Sopenharmony_ci                           &dynamic_ves, ve) {
7648bf215546Sopenharmony_ci            ve.DWordLength =
7649bf215546Sopenharmony_ci               1 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2;
7650bf215546Sopenharmony_ci         }
7651bf215546Sopenharmony_ci         memcpy(&dynamic_ves[1], &cso->vertex_elements[1],
7652bf215546Sopenharmony_ci                (cso->count - ice->state.vs_needs_edge_flag) *
7653bf215546Sopenharmony_ci                GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t));
7654bf215546Sopenharmony_ci         uint32_t *ve_pack_dest =
7655bf215546Sopenharmony_ci            &dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) *
7656bf215546Sopenharmony_ci                         GENX(VERTEX_ELEMENT_STATE_length)];
7657bf215546Sopenharmony_ci
7658bf215546Sopenharmony_ci         if (ice->state.vs_needs_sgvs_element) {
7659bf215546Sopenharmony_ci            uint32_t base_ctrl = ice->state.vs_uses_draw_params ?
7660bf215546Sopenharmony_ci                                 VFCOMP_STORE_SRC : VFCOMP_STORE_0;
7661bf215546Sopenharmony_ci            crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
7662bf215546Sopenharmony_ci               ve.Valid = true;
7663bf215546Sopenharmony_ci               ve.VertexBufferIndex =
7664bf215546Sopenharmony_ci                  util_bitcount64(ice->state.bound_vertex_buffers);
7665bf215546Sopenharmony_ci               ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
7666bf215546Sopenharmony_ci               ve.Component0Control = base_ctrl;
7667bf215546Sopenharmony_ci               ve.Component1Control = base_ctrl;
7668bf215546Sopenharmony_ci#if GFX_VER < 8
7669bf215546Sopenharmony_ci               ve.Component2Control = ice->state.vs_uses_vertexid ? VFCOMP_STORE_VID : VFCOMP_STORE_0;
7670bf215546Sopenharmony_ci               ve.Component3Control = ice->state.vs_uses_instanceid ? VFCOMP_STORE_IID : VFCOMP_STORE_0;
7671bf215546Sopenharmony_ci#else
7672bf215546Sopenharmony_ci               ve.Component2Control = VFCOMP_STORE_0;
7673bf215546Sopenharmony_ci               ve.Component3Control = VFCOMP_STORE_0;
7674bf215546Sopenharmony_ci#endif
7675bf215546Sopenharmony_ci#if GFX_VER < 5
7676bf215546Sopenharmony_ci               ve.DestinationElementOffset = cso->count * 4;
7677bf215546Sopenharmony_ci#endif
7678bf215546Sopenharmony_ci            }
7679bf215546Sopenharmony_ci            ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
7680bf215546Sopenharmony_ci         }
7681bf215546Sopenharmony_ci         if (ice->state.vs_uses_derived_draw_params) {
7682bf215546Sopenharmony_ci            crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
7683bf215546Sopenharmony_ci               ve.Valid = true;
7684bf215546Sopenharmony_ci               ve.VertexBufferIndex =
7685bf215546Sopenharmony_ci                  util_bitcount64(ice->state.bound_vertex_buffers) +
7686bf215546Sopenharmony_ci                  ice->state.vs_uses_draw_params;
7687bf215546Sopenharmony_ci               ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
7688bf215546Sopenharmony_ci               ve.Component0Control = VFCOMP_STORE_SRC;
7689bf215546Sopenharmony_ci               ve.Component1Control = VFCOMP_STORE_SRC;
7690bf215546Sopenharmony_ci               ve.Component2Control = VFCOMP_STORE_0;
7691bf215546Sopenharmony_ci               ve.Component3Control = VFCOMP_STORE_0;
7692bf215546Sopenharmony_ci#if GFX_VER < 5
7693bf215546Sopenharmony_ci               ve.DestinationElementOffset = (cso->count + ice->state.vs_needs_sgvs_element) * 4;
7694bf215546Sopenharmony_ci#endif
7695bf215546Sopenharmony_ci            }
7696bf215546Sopenharmony_ci            ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
7697bf215546Sopenharmony_ci         }
7698bf215546Sopenharmony_ci         if (ice->state.vs_needs_edge_flag) {
7699bf215546Sopenharmony_ci            for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length);  i++)
7700bf215546Sopenharmony_ci               ve_pack_dest[i] = cso->edgeflag_ve[i];
7701bf215546Sopenharmony_ci         }
7702bf215546Sopenharmony_ci
7703bf215546Sopenharmony_ci         crocus_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) *
7704bf215546Sopenharmony_ci                         (1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length)));
7705bf215546Sopenharmony_ci      }
7706bf215546Sopenharmony_ci
7707bf215546Sopenharmony_ci#if GFX_VER == 8
7708bf215546Sopenharmony_ci      if (!ice->state.vs_needs_edge_flag) {
7709bf215546Sopenharmony_ci         crocus_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) *
7710bf215546Sopenharmony_ci                         entries * GENX(3DSTATE_VF_INSTANCING_length));
7711bf215546Sopenharmony_ci      } else {
7712bf215546Sopenharmony_ci         assert(cso->count > 0);
7713bf215546Sopenharmony_ci         const unsigned edgeflag_index = cso->count - 1;
7714bf215546Sopenharmony_ci         uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)];
7715bf215546Sopenharmony_ci         memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index *
7716bf215546Sopenharmony_ci                GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t));
7717bf215546Sopenharmony_ci
7718bf215546Sopenharmony_ci         uint32_t *vfi_pack_dest = &dynamic_vfi[0] +
7719bf215546Sopenharmony_ci            edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length);
7720bf215546Sopenharmony_ci         crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
7721bf215546Sopenharmony_ci            vi.VertexElementIndex = edgeflag_index +
7722bf215546Sopenharmony_ci               ice->state.vs_needs_sgvs_element +
7723bf215546Sopenharmony_ci               ice->state.vs_uses_derived_draw_params;
7724bf215546Sopenharmony_ci         }
7725bf215546Sopenharmony_ci         for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length);  i++)
7726bf215546Sopenharmony_ci            vfi_pack_dest[i] |= cso->edgeflag_vfi[i];
7727bf215546Sopenharmony_ci
7728bf215546Sopenharmony_ci         crocus_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) *
7729bf215546Sopenharmony_ci                         entries * GENX(3DSTATE_VF_INSTANCING_length));
7730bf215546Sopenharmony_ci      }
7731bf215546Sopenharmony_ci#endif
7732bf215546Sopenharmony_ci   }
7733bf215546Sopenharmony_ci
7734bf215546Sopenharmony_ci#if GFX_VER == 8
7735bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN8_VF_SGVS) {
7736bf215546Sopenharmony_ci      const struct brw_vs_prog_data *vs_prog_data = (void *)
7737bf215546Sopenharmony_ci         ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
7738bf215546Sopenharmony_ci      struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements;
7739bf215546Sopenharmony_ci
7740bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) {
7741bf215546Sopenharmony_ci         if (vs_prog_data->uses_vertexid) {
7742bf215546Sopenharmony_ci            sgv.VertexIDEnable = true;
7743bf215546Sopenharmony_ci            sgv.VertexIDComponentNumber = 2;
7744bf215546Sopenharmony_ci            sgv.VertexIDElementOffset =
7745bf215546Sopenharmony_ci               cso->count - ice->state.vs_needs_edge_flag;
7746bf215546Sopenharmony_ci         }
7747bf215546Sopenharmony_ci
7748bf215546Sopenharmony_ci         if (vs_prog_data->uses_instanceid) {
7749bf215546Sopenharmony_ci            sgv.InstanceIDEnable = true;
7750bf215546Sopenharmony_ci            sgv.InstanceIDComponentNumber = 3;
7751bf215546Sopenharmony_ci            sgv.InstanceIDElementOffset =
7752bf215546Sopenharmony_ci               cso->count - ice->state.vs_needs_edge_flag;
7753bf215546Sopenharmony_ci         }
7754bf215546Sopenharmony_ci      }
7755bf215546Sopenharmony_ci   }
7756bf215546Sopenharmony_ci#endif
7757bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
7758bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN75_VF) {
7759bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
7760bf215546Sopenharmony_ci         if (draw->primitive_restart) {
7761bf215546Sopenharmony_ci            vf.IndexedDrawCutIndexEnable = true;
7762bf215546Sopenharmony_ci            vf.CutIndex = draw->restart_index;
7763bf215546Sopenharmony_ci         }
7764bf215546Sopenharmony_ci      }
7765bf215546Sopenharmony_ci   }
7766bf215546Sopenharmony_ci#endif
7767bf215546Sopenharmony_ci
7768bf215546Sopenharmony_ci#if GFX_VER == 8
7769bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN8_PMA_FIX) {
7770bf215546Sopenharmony_ci      bool enable = want_pma_fix(ice);
7771bf215546Sopenharmony_ci      genX(crocus_update_pma_fix)(ice, batch, enable);
7772bf215546Sopenharmony_ci   }
7773bf215546Sopenharmony_ci#endif
7774bf215546Sopenharmony_ci
7775bf215546Sopenharmony_ci#if GFX_VER <= 5
7776bf215546Sopenharmony_ci   if (dirty & CROCUS_DIRTY_GEN4_CURBE) {
7777bf215546Sopenharmony_ci      gen4_upload_curbe(batch);
7778bf215546Sopenharmony_ci   }
7779bf215546Sopenharmony_ci#endif
7780bf215546Sopenharmony_ci}
7781bf215546Sopenharmony_ci
7782bf215546Sopenharmony_cistatic void
7783bf215546Sopenharmony_cicrocus_upload_render_state(struct crocus_context *ice,
7784bf215546Sopenharmony_ci                           struct crocus_batch *batch,
7785bf215546Sopenharmony_ci                           const struct pipe_draw_info *draw,
7786bf215546Sopenharmony_ci                           unsigned drawid_offset,
7787bf215546Sopenharmony_ci                           const struct pipe_draw_indirect_info *indirect,
7788bf215546Sopenharmony_ci                           const struct pipe_draw_start_count_bias *sc)
7789bf215546Sopenharmony_ci{
7790bf215546Sopenharmony_ci#if GFX_VER >= 7
7791bf215546Sopenharmony_ci   bool use_predicate = ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT;
7792bf215546Sopenharmony_ci#endif
7793bf215546Sopenharmony_ci
7794bf215546Sopenharmony_ci   batch->no_wrap = true;
7795bf215546Sopenharmony_ci   batch->contains_draw = true;
7796bf215546Sopenharmony_ci
7797bf215546Sopenharmony_ci   crocus_update_surface_base_address(batch);
7798bf215546Sopenharmony_ci
7799bf215546Sopenharmony_ci   crocus_upload_dirty_render_state(ice, batch, draw);
7800bf215546Sopenharmony_ci
7801bf215546Sopenharmony_ci   batch->no_wrap = false;
7802bf215546Sopenharmony_ci   if (draw->index_size > 0) {
7803bf215546Sopenharmony_ci      unsigned offset;
7804bf215546Sopenharmony_ci      unsigned size;
7805bf215546Sopenharmony_ci      bool emit_index = false;
7806bf215546Sopenharmony_ci
7807bf215546Sopenharmony_ci      if (draw->has_user_indices) {
7808bf215546Sopenharmony_ci         unsigned start_offset = draw->index_size * sc->start;
7809bf215546Sopenharmony_ci         u_upload_data(ice->ctx.stream_uploader, 0,
7810bf215546Sopenharmony_ci                       sc->count * draw->index_size, 4,
7811bf215546Sopenharmony_ci                       (char *)draw->index.user + start_offset,
7812bf215546Sopenharmony_ci                       &offset, &ice->state.index_buffer.res);
7813bf215546Sopenharmony_ci         offset -= start_offset;
7814bf215546Sopenharmony_ci         size = start_offset + sc->count * draw->index_size;
7815bf215546Sopenharmony_ci         emit_index = true;
7816bf215546Sopenharmony_ci      } else {
7817bf215546Sopenharmony_ci         struct crocus_resource *res = (void *) draw->index.resource;
7818bf215546Sopenharmony_ci
7819bf215546Sopenharmony_ci         if (ice->state.index_buffer.res != draw->index.resource) {
7820bf215546Sopenharmony_ci            res->bind_history |= PIPE_BIND_INDEX_BUFFER;
7821bf215546Sopenharmony_ci            pipe_resource_reference(&ice->state.index_buffer.res,
7822bf215546Sopenharmony_ci                                    draw->index.resource);
7823bf215546Sopenharmony_ci            emit_index = true;
7824bf215546Sopenharmony_ci         }
7825bf215546Sopenharmony_ci         offset = 0;
7826bf215546Sopenharmony_ci         size = draw->index.resource->width0;
7827bf215546Sopenharmony_ci      }
7828bf215546Sopenharmony_ci
7829bf215546Sopenharmony_ci      if (!emit_index &&
7830bf215546Sopenharmony_ci          (ice->state.index_buffer.size != size ||
7831bf215546Sopenharmony_ci           ice->state.index_buffer.index_size != draw->index_size
7832bf215546Sopenharmony_ci#if GFX_VERx10 < 75
7833bf215546Sopenharmony_ci           || ice->state.index_buffer.prim_restart != draw->primitive_restart
7834bf215546Sopenharmony_ci#endif
7835bf215546Sopenharmony_ci	   )
7836bf215546Sopenharmony_ci	  )
7837bf215546Sopenharmony_ci         emit_index = true;
7838bf215546Sopenharmony_ci
7839bf215546Sopenharmony_ci      if (emit_index) {
7840bf215546Sopenharmony_ci         struct crocus_bo *bo = crocus_resource_bo(ice->state.index_buffer.res);
7841bf215546Sopenharmony_ci
7842bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
7843bf215546Sopenharmony_ci#if GFX_VERx10 < 75
7844bf215546Sopenharmony_ci            ib.CutIndexEnable = draw->primitive_restart;
7845bf215546Sopenharmony_ci#endif
7846bf215546Sopenharmony_ci            ib.IndexFormat = draw->index_size >> 1;
7847bf215546Sopenharmony_ci            ib.BufferStartingAddress = ro_bo(bo, offset);
7848bf215546Sopenharmony_ci#if GFX_VER >= 8
7849bf215546Sopenharmony_ci            ib.BufferSize = bo->size - offset;
7850bf215546Sopenharmony_ci#else
7851bf215546Sopenharmony_ci            ib.BufferEndingAddress = ro_bo(bo, offset + size - 1);
7852bf215546Sopenharmony_ci#endif
7853bf215546Sopenharmony_ci#if GFX_VER >= 6
7854bf215546Sopenharmony_ci            ib.MOCS = crocus_mocs(bo, &batch->screen->isl_dev);
7855bf215546Sopenharmony_ci#endif
7856bf215546Sopenharmony_ci         }
7857bf215546Sopenharmony_ci         ice->state.index_buffer.size = size;
7858bf215546Sopenharmony_ci         ice->state.index_buffer.offset = offset;
7859bf215546Sopenharmony_ci         ice->state.index_buffer.index_size = draw->index_size;
7860bf215546Sopenharmony_ci#if GFX_VERx10 < 75
7861bf215546Sopenharmony_ci         ice->state.index_buffer.prim_restart = draw->primitive_restart;
7862bf215546Sopenharmony_ci#endif
7863bf215546Sopenharmony_ci      }
7864bf215546Sopenharmony_ci   }
7865bf215546Sopenharmony_ci
7866bf215546Sopenharmony_ci#define _3DPRIM_END_OFFSET          0x2420
7867bf215546Sopenharmony_ci#define _3DPRIM_START_VERTEX        0x2430
7868bf215546Sopenharmony_ci#define _3DPRIM_VERTEX_COUNT        0x2434
7869bf215546Sopenharmony_ci#define _3DPRIM_INSTANCE_COUNT      0x2438
7870bf215546Sopenharmony_ci#define _3DPRIM_START_INSTANCE      0x243C
7871bf215546Sopenharmony_ci#define _3DPRIM_BASE_VERTEX         0x2440
7872bf215546Sopenharmony_ci
7873bf215546Sopenharmony_ci#if GFX_VER >= 7
7874bf215546Sopenharmony_ci   if (indirect && !indirect->count_from_stream_output) {
7875bf215546Sopenharmony_ci      if (indirect->indirect_draw_count) {
7876bf215546Sopenharmony_ci         use_predicate = true;
7877bf215546Sopenharmony_ci
7878bf215546Sopenharmony_ci         struct crocus_bo *draw_count_bo =
7879bf215546Sopenharmony_ci            crocus_resource_bo(indirect->indirect_draw_count);
7880bf215546Sopenharmony_ci         unsigned draw_count_offset =
7881bf215546Sopenharmony_ci            indirect->indirect_draw_count_offset;
7882bf215546Sopenharmony_ci
7883bf215546Sopenharmony_ci         crocus_emit_pipe_control_flush(batch,
7884bf215546Sopenharmony_ci                                        "ensure indirect draw buffer is flushed",
7885bf215546Sopenharmony_ci                                        PIPE_CONTROL_FLUSH_ENABLE);
7886bf215546Sopenharmony_ci         if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
7887bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
7888bf215546Sopenharmony_ci            struct mi_builder b;
7889bf215546Sopenharmony_ci            mi_builder_init(&b, &batch->screen->devinfo, batch);
7890bf215546Sopenharmony_ci
7891bf215546Sopenharmony_ci            /* comparison = draw id < draw count */
7892bf215546Sopenharmony_ci            struct mi_value comparison =
7893bf215546Sopenharmony_ci               mi_ult(&b, mi_imm(drawid_offset),
7894bf215546Sopenharmony_ci                      mi_mem32(ro_bo(draw_count_bo,
7895bf215546Sopenharmony_ci                                     draw_count_offset)));
7896bf215546Sopenharmony_ci#if GFX_VER == 8
7897bf215546Sopenharmony_ci            /* predicate = comparison & conditional rendering predicate */
7898bf215546Sopenharmony_ci            mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
7899bf215546Sopenharmony_ci                         mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
7900bf215546Sopenharmony_ci#else
7901bf215546Sopenharmony_ci            /* predicate = comparison & conditional rendering predicate */
7902bf215546Sopenharmony_ci            struct mi_value pred = mi_iand(&b, comparison,
7903bf215546Sopenharmony_ci                                           mi_reg32(CS_GPR(15)));
7904bf215546Sopenharmony_ci
7905bf215546Sopenharmony_ci            mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), pred);
7906bf215546Sopenharmony_ci            mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
7907bf215546Sopenharmony_ci
7908bf215546Sopenharmony_ci            unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
7909bf215546Sopenharmony_ci               MI_PREDICATE_COMBINEOP_SET |
7910bf215546Sopenharmony_ci               MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
7911bf215546Sopenharmony_ci
7912bf215546Sopenharmony_ci            crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
7913bf215546Sopenharmony_ci#endif
7914bf215546Sopenharmony_ci#endif
7915bf215546Sopenharmony_ci         } else {
7916bf215546Sopenharmony_ci            uint32_t mi_predicate;
7917bf215546Sopenharmony_ci
7918bf215546Sopenharmony_ci            /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
7919bf215546Sopenharmony_ci            crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, drawid_offset);
7920bf215546Sopenharmony_ci            /* Upload the current draw count from the draw parameters buffer
7921bf215546Sopenharmony_ci             * to MI_PREDICATE_SRC0.
7922bf215546Sopenharmony_ci             */
7923bf215546Sopenharmony_ci            crocus_load_register_mem32(batch, MI_PREDICATE_SRC0,
7924bf215546Sopenharmony_ci                                       draw_count_bo, draw_count_offset);
7925bf215546Sopenharmony_ci            /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
7926bf215546Sopenharmony_ci            crocus_load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0);
7927bf215546Sopenharmony_ci
7928bf215546Sopenharmony_ci            if (drawid_offset == 0) {
7929bf215546Sopenharmony_ci               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
7930bf215546Sopenharmony_ci                  MI_PREDICATE_COMBINEOP_SET |
7931bf215546Sopenharmony_ci                  MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
7932bf215546Sopenharmony_ci            } else {
7933bf215546Sopenharmony_ci               /* While draw_index < draw_count the predicate's result will be
7934bf215546Sopenharmony_ci                *  (draw_index == draw_count) ^ TRUE = TRUE
7935bf215546Sopenharmony_ci                * When draw_index == draw_count the result is
7936bf215546Sopenharmony_ci                *  (TRUE) ^ TRUE = FALSE
7937bf215546Sopenharmony_ci                * After this all results will be:
7938bf215546Sopenharmony_ci                *  (FALSE) ^ FALSE = FALSE
7939bf215546Sopenharmony_ci                */
7940bf215546Sopenharmony_ci               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
7941bf215546Sopenharmony_ci                  MI_PREDICATE_COMBINEOP_XOR |
7942bf215546Sopenharmony_ci                  MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
7943bf215546Sopenharmony_ci            }
7944bf215546Sopenharmony_ci            crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
7945bf215546Sopenharmony_ci         }
7946bf215546Sopenharmony_ci      }
7947bf215546Sopenharmony_ci
7948bf215546Sopenharmony_ci#if GFX_VER >= 7
7949bf215546Sopenharmony_ci      struct crocus_bo *bo = crocus_resource_bo(indirect->buffer);
7950bf215546Sopenharmony_ci      assert(bo);
7951bf215546Sopenharmony_ci
7952bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7953bf215546Sopenharmony_ci         lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT;
7954bf215546Sopenharmony_ci         lrm.MemoryAddress = ro_bo(bo, indirect->offset + 0);
7955bf215546Sopenharmony_ci      }
7956bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7957bf215546Sopenharmony_ci         lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT;
7958bf215546Sopenharmony_ci         lrm.MemoryAddress = ro_bo(bo, indirect->offset + 4);
7959bf215546Sopenharmony_ci      }
7960bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7961bf215546Sopenharmony_ci         lrm.RegisterAddress = _3DPRIM_START_VERTEX;
7962bf215546Sopenharmony_ci         lrm.MemoryAddress = ro_bo(bo, indirect->offset + 8);
7963bf215546Sopenharmony_ci      }
7964bf215546Sopenharmony_ci      if (draw->index_size) {
7965bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7966bf215546Sopenharmony_ci            lrm.RegisterAddress = _3DPRIM_BASE_VERTEX;
7967bf215546Sopenharmony_ci            lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);
7968bf215546Sopenharmony_ci         }
7969bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7970bf215546Sopenharmony_ci            lrm.RegisterAddress = _3DPRIM_START_INSTANCE;
7971bf215546Sopenharmony_ci            lrm.MemoryAddress = ro_bo(bo, indirect->offset + 16);
7972bf215546Sopenharmony_ci         }
7973bf215546Sopenharmony_ci      } else {
7974bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
7975bf215546Sopenharmony_ci            lrm.RegisterAddress = _3DPRIM_START_INSTANCE;
7976bf215546Sopenharmony_ci            lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);
7977bf215546Sopenharmony_ci         }
7978bf215546Sopenharmony_ci         crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
7979bf215546Sopenharmony_ci            lri.RegisterOffset = _3DPRIM_BASE_VERTEX;
7980bf215546Sopenharmony_ci            lri.DataDWord = 0;
7981bf215546Sopenharmony_ci         }
7982bf215546Sopenharmony_ci      }
7983bf215546Sopenharmony_ci#endif
7984bf215546Sopenharmony_ci   } else if (indirect && indirect->count_from_stream_output) {
7985bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
7986bf215546Sopenharmony_ci      struct crocus_stream_output_target *so =
7987bf215546Sopenharmony_ci         (void *) indirect->count_from_stream_output;
7988bf215546Sopenharmony_ci
7989bf215546Sopenharmony_ci      /* XXX: Replace with actual cache tracking */
7990bf215546Sopenharmony_ci      crocus_emit_pipe_control_flush(batch,
7991bf215546Sopenharmony_ci                                     "draw count from stream output stall",
7992bf215546Sopenharmony_ci                                     PIPE_CONTROL_CS_STALL);
7993bf215546Sopenharmony_ci
7994bf215546Sopenharmony_ci      struct mi_builder b;
7995bf215546Sopenharmony_ci      mi_builder_init(&b, &batch->screen->devinfo, batch);
7996bf215546Sopenharmony_ci
7997bf215546Sopenharmony_ci      struct crocus_address addr =
7998bf215546Sopenharmony_ci         ro_bo(crocus_resource_bo(&so->offset_res->base.b), so->offset_offset);
7999bf215546Sopenharmony_ci      struct mi_value offset =
8000bf215546Sopenharmony_ci         mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
8001bf215546Sopenharmony_ci
8002bf215546Sopenharmony_ci      mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
8003bf215546Sopenharmony_ci               mi_udiv32_imm(&b, offset, so->stride));
8004bf215546Sopenharmony_ci
8005bf215546Sopenharmony_ci      _crocus_emit_lri(batch, _3DPRIM_START_VERTEX, 0);
8006bf215546Sopenharmony_ci      _crocus_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0);
8007bf215546Sopenharmony_ci      _crocus_emit_lri(batch, _3DPRIM_START_INSTANCE, 0);
8008bf215546Sopenharmony_ci      _crocus_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count);
8009bf215546Sopenharmony_ci#endif
8010bf215546Sopenharmony_ci   }
8011bf215546Sopenharmony_ci#else
8012bf215546Sopenharmony_ci   assert(!indirect);
8013bf215546Sopenharmony_ci#endif
8014bf215546Sopenharmony_ci
8015bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
8016bf215546Sopenharmony_ci      prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
8017bf215546Sopenharmony_ci#if GFX_VER >= 7
8018bf215546Sopenharmony_ci      prim.PredicateEnable = use_predicate;
8019bf215546Sopenharmony_ci#endif
8020bf215546Sopenharmony_ci
8021bf215546Sopenharmony_ci      prim.PrimitiveTopologyType = translate_prim_type(ice->state.prim_mode, ice->state.patch_vertices);
8022bf215546Sopenharmony_ci      if (indirect) {
8023bf215546Sopenharmony_ci         // XXX Probably have to do something for gen6 here?
8024bf215546Sopenharmony_ci#if GFX_VER >= 7
8025bf215546Sopenharmony_ci         prim.IndirectParameterEnable = true;
8026bf215546Sopenharmony_ci#endif
8027bf215546Sopenharmony_ci      } else {
8028bf215546Sopenharmony_ci#if GFX_VER >= 5
8029bf215546Sopenharmony_ci         prim.StartInstanceLocation = draw->start_instance;
8030bf215546Sopenharmony_ci#endif
8031bf215546Sopenharmony_ci         prim.InstanceCount = draw->instance_count;
8032bf215546Sopenharmony_ci         prim.VertexCountPerInstance = sc->count;
8033bf215546Sopenharmony_ci
8034bf215546Sopenharmony_ci         prim.StartVertexLocation = sc->start;
8035bf215546Sopenharmony_ci
8036bf215546Sopenharmony_ci         if (draw->index_size) {
8037bf215546Sopenharmony_ci            prim.BaseVertexLocation += sc->index_bias;
8038bf215546Sopenharmony_ci         }
8039bf215546Sopenharmony_ci      }
8040bf215546Sopenharmony_ci   }
8041bf215546Sopenharmony_ci}
8042bf215546Sopenharmony_ci
8043bf215546Sopenharmony_ci#if GFX_VER >= 7
8044bf215546Sopenharmony_ci
8045bf215546Sopenharmony_cistatic void
8046bf215546Sopenharmony_cicrocus_upload_compute_state(struct crocus_context *ice,
8047bf215546Sopenharmony_ci                            struct crocus_batch *batch,
8048bf215546Sopenharmony_ci                            const struct pipe_grid_info *grid)
8049bf215546Sopenharmony_ci{
8050bf215546Sopenharmony_ci   const uint64_t stage_dirty = ice->state.stage_dirty;
8051bf215546Sopenharmony_ci   struct crocus_screen *screen = batch->screen;
8052bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &screen->devinfo;
8053bf215546Sopenharmony_ci   struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
8054bf215546Sopenharmony_ci   struct crocus_compiled_shader *shader =
8055bf215546Sopenharmony_ci      ice->shaders.prog[MESA_SHADER_COMPUTE];
8056bf215546Sopenharmony_ci   struct brw_stage_prog_data *prog_data = shader->prog_data;
8057bf215546Sopenharmony_ci   struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
8058bf215546Sopenharmony_ci   const struct brw_cs_dispatch_info dispatch =
8059bf215546Sopenharmony_ci      brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block);
8060bf215546Sopenharmony_ci
8061bf215546Sopenharmony_ci   crocus_update_surface_base_address(batch);
8062bf215546Sopenharmony_ci   if ((stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS) && shs->sysvals_need_upload)
8063bf215546Sopenharmony_ci      upload_sysvals(ice, MESA_SHADER_COMPUTE);
8064bf215546Sopenharmony_ci
8065bf215546Sopenharmony_ci   if (stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_CS) {
8066bf215546Sopenharmony_ci      crocus_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
8067bf215546Sopenharmony_ci      ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset =
8068bf215546Sopenharmony_ci         crocus_upload_binding_table(ice, batch,
8069bf215546Sopenharmony_ci                                     ice->shaders.prog[MESA_SHADER_COMPUTE]->surf_offset,
8070bf215546Sopenharmony_ci                                     ice->shaders.prog[MESA_SHADER_COMPUTE]->bt.size_bytes);
8071bf215546Sopenharmony_ci   }
8072bf215546Sopenharmony_ci
8073bf215546Sopenharmony_ci   if (stage_dirty & CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS)
8074bf215546Sopenharmony_ci      crocus_upload_sampler_states(ice, batch, MESA_SHADER_COMPUTE);
8075bf215546Sopenharmony_ci
8076bf215546Sopenharmony_ci   if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) ||
8077bf215546Sopenharmony_ci       cs_prog_data->local_size[0] == 0 /* Variable local group size */) {
8078bf215546Sopenharmony_ci      /* The MEDIA_VFE_STATE documentation for Gen8+ says:
8079bf215546Sopenharmony_ci       *
8080bf215546Sopenharmony_ci       *   "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
8081bf215546Sopenharmony_ci       *    the only bits that are changed are scoreboard related: Scoreboard
8082bf215546Sopenharmony_ci       *    Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta.  For
8083bf215546Sopenharmony_ci       *    these scoreboard related states, a MEDIA_STATE_FLUSH is
8084bf215546Sopenharmony_ci       *    sufficient."
8085bf215546Sopenharmony_ci       */
8086bf215546Sopenharmony_ci      crocus_emit_pipe_control_flush(batch,
8087bf215546Sopenharmony_ci                                     "workaround: stall before MEDIA_VFE_STATE",
8088bf215546Sopenharmony_ci                                     PIPE_CONTROL_CS_STALL);
8089bf215546Sopenharmony_ci
8090bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
8091bf215546Sopenharmony_ci         if (prog_data->total_scratch) {
8092bf215546Sopenharmony_ci            struct crocus_bo *bo =
8093bf215546Sopenharmony_ci               crocus_get_scratch_space(ice, prog_data->total_scratch,
8094bf215546Sopenharmony_ci                                        MESA_SHADER_COMPUTE);
8095bf215546Sopenharmony_ci#if GFX_VER == 8
8096bf215546Sopenharmony_ci            /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
8097bf215546Sopenharmony_ci             * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
8098bf215546Sopenharmony_ci             */
8099bf215546Sopenharmony_ci            vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
8100bf215546Sopenharmony_ci#elif GFX_VERx10 == 75
8101bf215546Sopenharmony_ci            /* Haswell's Per Thread Scratch Space is in the range [0, 10]
8102bf215546Sopenharmony_ci             * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
8103bf215546Sopenharmony_ci             */
8104bf215546Sopenharmony_ci            vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 12;
8105bf215546Sopenharmony_ci#else
8106bf215546Sopenharmony_ci            /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
8107bf215546Sopenharmony_ci             * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
8108bf215546Sopenharmony_ci             */
8109bf215546Sopenharmony_ci            vfe.PerThreadScratchSpace = prog_data->total_scratch / 1024 - 1;
8110bf215546Sopenharmony_ci#endif
8111bf215546Sopenharmony_ci            vfe.ScratchSpaceBasePointer = rw_bo(bo, 0);
8112bf215546Sopenharmony_ci         }
8113bf215546Sopenharmony_ci
8114bf215546Sopenharmony_ci         vfe.MaximumNumberofThreads =
8115bf215546Sopenharmony_ci            devinfo->max_cs_threads * devinfo->subslice_total - 1;
8116bf215546Sopenharmony_ci         vfe.ResetGatewayTimer =
8117bf215546Sopenharmony_ci            Resettingrelativetimerandlatchingtheglobaltimestamp;
8118bf215546Sopenharmony_ci         vfe.BypassGatewayControl = true;
8119bf215546Sopenharmony_ci#if GFX_VER == 7
8120bf215546Sopenharmony_ci         vfe.GPGPUMode = true;
8121bf215546Sopenharmony_ci#endif
8122bf215546Sopenharmony_ci#if GFX_VER == 8
8123bf215546Sopenharmony_ci         vfe.BypassGatewayControl = true;
8124bf215546Sopenharmony_ci#endif
8125bf215546Sopenharmony_ci         vfe.NumberofURBEntries = GFX_VER == 8 ? 2 : 0;
8126bf215546Sopenharmony_ci         vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0;
8127bf215546Sopenharmony_ci
8128bf215546Sopenharmony_ci         vfe.CURBEAllocationSize =
8129bf215546Sopenharmony_ci            ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
8130bf215546Sopenharmony_ci                  cs_prog_data->push.cross_thread.regs, 2);
8131bf215546Sopenharmony_ci      }
8132bf215546Sopenharmony_ci   }
8133bf215546Sopenharmony_ci
8134bf215546Sopenharmony_ci   /* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */
8135bf215546Sopenharmony_ci   if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) ||
8136bf215546Sopenharmony_ci       cs_prog_data->local_size[0] == 0 /* Variable local group size */) {
8137bf215546Sopenharmony_ci      uint32_t curbe_data_offset = 0;
8138bf215546Sopenharmony_ci      assert(cs_prog_data->push.cross_thread.dwords == 0 &&
8139bf215546Sopenharmony_ci             cs_prog_data->push.per_thread.dwords == 1 &&
8140bf215546Sopenharmony_ci             cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
8141bf215546Sopenharmony_ci      const unsigned push_const_size =
8142bf215546Sopenharmony_ci         brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
8143bf215546Sopenharmony_ci      uint32_t *curbe_data_map =
8144bf215546Sopenharmony_ci         stream_state(batch,
8145bf215546Sopenharmony_ci                      ALIGN(push_const_size, 64), 64,
8146bf215546Sopenharmony_ci                      &curbe_data_offset);
8147bf215546Sopenharmony_ci      assert(curbe_data_map);
8148bf215546Sopenharmony_ci      memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
8149bf215546Sopenharmony_ci      crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,
8150bf215546Sopenharmony_ci                                       curbe_data_map);
8151bf215546Sopenharmony_ci
8152bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
8153bf215546Sopenharmony_ci         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
8154bf215546Sopenharmony_ci         curbe.CURBEDataStartAddress = curbe_data_offset;
8155bf215546Sopenharmony_ci      }
8156bf215546Sopenharmony_ci   }
8157bf215546Sopenharmony_ci
8158bf215546Sopenharmony_ci   if (stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS |
8159bf215546Sopenharmony_ci                      CROCUS_STAGE_DIRTY_BINDINGS_CS |
8160bf215546Sopenharmony_ci                      CROCUS_STAGE_DIRTY_CONSTANTS_CS |
8161bf215546Sopenharmony_ci                      CROCUS_STAGE_DIRTY_CS)) {
8162bf215546Sopenharmony_ci      uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
8163bf215546Sopenharmony_ci      const uint64_t ksp = KSP(ice,shader) + brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size);
8164bf215546Sopenharmony_ci      crocus_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
8165bf215546Sopenharmony_ci         idd.KernelStartPointer = ksp;
8166bf215546Sopenharmony_ci         idd.SamplerStatePointer = shs->sampler_offset;
8167bf215546Sopenharmony_ci         idd.BindingTablePointer = ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset;
8168bf215546Sopenharmony_ci         idd.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31);
8169bf215546Sopenharmony_ci         idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
8170bf215546Sopenharmony_ci         idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
8171bf215546Sopenharmony_ci         idd.BarrierEnable = cs_prog_data->uses_barrier;
8172bf215546Sopenharmony_ci         idd.SharedLocalMemorySize = encode_slm_size(GFX_VER,
8173bf215546Sopenharmony_ci                                                     prog_data->total_shared);
8174bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
8175bf215546Sopenharmony_ci         idd.CrossThreadConstantDataReadLength = cs_prog_data->push.cross_thread.regs;
8176bf215546Sopenharmony_ci#endif
8177bf215546Sopenharmony_ci      }
8178bf215546Sopenharmony_ci
8179bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
8180bf215546Sopenharmony_ci         load.InterfaceDescriptorTotalLength =
8181bf215546Sopenharmony_ci            GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
8182bf215546Sopenharmony_ci         load.InterfaceDescriptorDataStartAddress =
8183bf215546Sopenharmony_ci            emit_state(batch, desc, sizeof(desc), 64);
8184bf215546Sopenharmony_ci      }
8185bf215546Sopenharmony_ci   }
8186bf215546Sopenharmony_ci
8187bf215546Sopenharmony_ci#define GPGPU_DISPATCHDIMX 0x2500
8188bf215546Sopenharmony_ci#define GPGPU_DISPATCHDIMY 0x2504
8189bf215546Sopenharmony_ci#define GPGPU_DISPATCHDIMZ 0x2508
8190bf215546Sopenharmony_ci
8191bf215546Sopenharmony_ci   if (grid->indirect) {
8192bf215546Sopenharmony_ci      struct crocus_state_ref *grid_size = &ice->state.grid_size;
8193bf215546Sopenharmony_ci      struct crocus_bo *bo = crocus_resource_bo(grid_size->res);
8194bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
8195bf215546Sopenharmony_ci         lrm.RegisterAddress = GPGPU_DISPATCHDIMX;
8196bf215546Sopenharmony_ci         lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0);
8197bf215546Sopenharmony_ci      }
8198bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
8199bf215546Sopenharmony_ci         lrm.RegisterAddress = GPGPU_DISPATCHDIMY;
8200bf215546Sopenharmony_ci         lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4);
8201bf215546Sopenharmony_ci      }
8202bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
8203bf215546Sopenharmony_ci         lrm.RegisterAddress = GPGPU_DISPATCHDIMZ;
8204bf215546Sopenharmony_ci         lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8);
8205bf215546Sopenharmony_ci      }
8206bf215546Sopenharmony_ci
8207bf215546Sopenharmony_ci#if GFX_VER == 7
8208bf215546Sopenharmony_ci      /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
8209bf215546Sopenharmony_ci      _crocus_emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0);
8210bf215546Sopenharmony_ci      crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, 0);
8211bf215546Sopenharmony_ci
8212bf215546Sopenharmony_ci      /* Load compute_dispatch_indirect_x_size into SRC0 */
8213bf215546Sopenharmony_ci      crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 0);
8214bf215546Sopenharmony_ci
8215bf215546Sopenharmony_ci      /* predicate = (compute_dispatch_indirect_x_size == 0); */
8216bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8217bf215546Sopenharmony_ci         mip.LoadOperation    = LOAD_LOAD;
8218bf215546Sopenharmony_ci         mip.CombineOperation = COMBINE_SET;
8219bf215546Sopenharmony_ci         mip.CompareOperation = COMPARE_SRCS_EQUAL;
8220bf215546Sopenharmony_ci      };
8221bf215546Sopenharmony_ci
8222bf215546Sopenharmony_ci      /* Load compute_dispatch_indirect_y_size into SRC0 */
8223bf215546Sopenharmony_ci      crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 4);
8224bf215546Sopenharmony_ci
8225bf215546Sopenharmony_ci      /* predicate = (compute_dispatch_indirect_y_size == 0); */
8226bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8227bf215546Sopenharmony_ci         mip.LoadOperation    = LOAD_LOAD;
8228bf215546Sopenharmony_ci         mip.CombineOperation = COMBINE_OR;
8229bf215546Sopenharmony_ci         mip.CompareOperation = COMPARE_SRCS_EQUAL;
8230bf215546Sopenharmony_ci      };
8231bf215546Sopenharmony_ci
8232bf215546Sopenharmony_ci      /* Load compute_dispatch_indirect_z_size into SRC0 */
8233bf215546Sopenharmony_ci      crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 8);
8234bf215546Sopenharmony_ci
8235bf215546Sopenharmony_ci      /* predicate = (compute_dispatch_indirect_z_size == 0); */
8236bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8237bf215546Sopenharmony_ci         mip.LoadOperation    = LOAD_LOAD;
8238bf215546Sopenharmony_ci         mip.CombineOperation = COMBINE_OR;
8239bf215546Sopenharmony_ci         mip.CompareOperation = COMPARE_SRCS_EQUAL;
8240bf215546Sopenharmony_ci      };
8241bf215546Sopenharmony_ci
8242bf215546Sopenharmony_ci      /* predicate = !predicate; */
8243bf215546Sopenharmony_ci#define COMPARE_FALSE                           1
8244bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {
8245bf215546Sopenharmony_ci         mip.LoadOperation    = LOAD_LOADINV;
8246bf215546Sopenharmony_ci         mip.CombineOperation = COMBINE_OR;
8247bf215546Sopenharmony_ci         mip.CompareOperation = COMPARE_FALSE;
8248bf215546Sopenharmony_ci      }
8249bf215546Sopenharmony_ci#endif
8250bf215546Sopenharmony_ci   }
8251bf215546Sopenharmony_ci
8252bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) {
8253bf215546Sopenharmony_ci      ggw.IndirectParameterEnable    = grid->indirect != NULL;
8254bf215546Sopenharmony_ci      ggw.PredicateEnable            = GFX_VER <= 7 && grid->indirect != NULL;
8255bf215546Sopenharmony_ci      ggw.SIMDSize                   = dispatch.simd_size / 16;
8256bf215546Sopenharmony_ci      ggw.ThreadDepthCounterMaximum  = 0;
8257bf215546Sopenharmony_ci      ggw.ThreadHeightCounterMaximum = 0;
8258bf215546Sopenharmony_ci      ggw.ThreadWidthCounterMaximum  = dispatch.threads - 1;
8259bf215546Sopenharmony_ci      ggw.ThreadGroupIDXDimension    = grid->grid[0];
8260bf215546Sopenharmony_ci      ggw.ThreadGroupIDYDimension    = grid->grid[1];
8261bf215546Sopenharmony_ci      ggw.ThreadGroupIDZDimension    = grid->grid[2];
8262bf215546Sopenharmony_ci      ggw.RightExecutionMask         = dispatch.right_mask;
8263bf215546Sopenharmony_ci      ggw.BottomExecutionMask        = 0xffffffff;
8264bf215546Sopenharmony_ci   }
8265bf215546Sopenharmony_ci
8266bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf);
8267bf215546Sopenharmony_ci
8268bf215546Sopenharmony_ci   batch->contains_draw = true;
8269bf215546Sopenharmony_ci}
8270bf215546Sopenharmony_ci
8271bf215546Sopenharmony_ci#endif /* GFX_VER >= 7 */
8272bf215546Sopenharmony_ci
8273bf215546Sopenharmony_ci/**
8274bf215546Sopenharmony_ci * State module teardown.
8275bf215546Sopenharmony_ci */
8276bf215546Sopenharmony_cistatic void
8277bf215546Sopenharmony_cicrocus_destroy_state(struct crocus_context *ice)
8278bf215546Sopenharmony_ci{
8279bf215546Sopenharmony_ci   pipe_resource_reference(&ice->draw.draw_params.res, NULL);
8280bf215546Sopenharmony_ci   pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL);
8281bf215546Sopenharmony_ci
8282bf215546Sopenharmony_ci   free(ice->state.genx);
8283bf215546Sopenharmony_ci
8284bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++) {
8285bf215546Sopenharmony_ci      pipe_so_target_reference(&ice->state.so_target[i], NULL);
8286bf215546Sopenharmony_ci   }
8287bf215546Sopenharmony_ci
8288bf215546Sopenharmony_ci   for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
8289bf215546Sopenharmony_ci      pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
8290bf215546Sopenharmony_ci   }
8291bf215546Sopenharmony_ci   pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
8292bf215546Sopenharmony_ci
8293bf215546Sopenharmony_ci   for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
8294bf215546Sopenharmony_ci      struct crocus_shader_state *shs = &ice->state.shaders[stage];
8295bf215546Sopenharmony_ci      for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
8296bf215546Sopenharmony_ci         pipe_resource_reference(&shs->constbufs[i].buffer, NULL);
8297bf215546Sopenharmony_ci      }
8298bf215546Sopenharmony_ci      for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) {
8299bf215546Sopenharmony_ci         pipe_resource_reference(&shs->image[i].base.resource, NULL);
8300bf215546Sopenharmony_ci      }
8301bf215546Sopenharmony_ci      for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) {
8302bf215546Sopenharmony_ci         pipe_resource_reference(&shs->ssbo[i].buffer, NULL);
8303bf215546Sopenharmony_ci      }
8304bf215546Sopenharmony_ci      for (int i = 0; i < CROCUS_MAX_TEXTURE_SAMPLERS; i++) {
8305bf215546Sopenharmony_ci         pipe_sampler_view_reference((struct pipe_sampler_view **)
8306bf215546Sopenharmony_ci                                     &shs->textures[i], NULL);
8307bf215546Sopenharmony_ci      }
8308bf215546Sopenharmony_ci   }
8309bf215546Sopenharmony_ci
8310bf215546Sopenharmony_ci   for (int i = 0; i < 16; i++)
8311bf215546Sopenharmony_ci      pipe_resource_reference(&ice->state.vertex_buffers[i].buffer.resource, NULL);
8312bf215546Sopenharmony_ci   pipe_resource_reference(&ice->state.grid_size.res, NULL);
8313bf215546Sopenharmony_ci
8314bf215546Sopenharmony_ci   pipe_resource_reference(&ice->state.index_buffer.res, NULL);
8315bf215546Sopenharmony_ci}
8316bf215546Sopenharmony_ci
8317bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */
8318bf215546Sopenharmony_ci
8319bf215546Sopenharmony_cistatic void
8320bf215546Sopenharmony_cicrocus_rebind_buffer(struct crocus_context *ice,
8321bf215546Sopenharmony_ci                     struct crocus_resource *res)
8322bf215546Sopenharmony_ci{
8323bf215546Sopenharmony_ci   struct pipe_context *ctx = &ice->ctx;
8324bf215546Sopenharmony_ci
8325bf215546Sopenharmony_ci   assert(res->base.b.target == PIPE_BUFFER);
8326bf215546Sopenharmony_ci
8327bf215546Sopenharmony_ci   /* Buffers can't be framebuffer attachments, nor display related,
8328bf215546Sopenharmony_ci    * and we don't have upstream Clover support.
8329bf215546Sopenharmony_ci    */
8330bf215546Sopenharmony_ci   assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL |
8331bf215546Sopenharmony_ci                                 PIPE_BIND_RENDER_TARGET |
8332bf215546Sopenharmony_ci                                 PIPE_BIND_BLENDABLE |
8333bf215546Sopenharmony_ci                                 PIPE_BIND_DISPLAY_TARGET |
8334bf215546Sopenharmony_ci                                 PIPE_BIND_CURSOR |
8335bf215546Sopenharmony_ci                                 PIPE_BIND_COMPUTE_RESOURCE |
8336bf215546Sopenharmony_ci                                 PIPE_BIND_GLOBAL)));
8337bf215546Sopenharmony_ci
8338bf215546Sopenharmony_ci   if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) {
8339bf215546Sopenharmony_ci      uint64_t bound_vbs = ice->state.bound_vertex_buffers;
8340bf215546Sopenharmony_ci      while (bound_vbs) {
8341bf215546Sopenharmony_ci         const int i = u_bit_scan64(&bound_vbs);
8342bf215546Sopenharmony_ci         struct pipe_vertex_buffer *buffer = &ice->state.vertex_buffers[i];
8343bf215546Sopenharmony_ci
8344bf215546Sopenharmony_ci         if (!buffer->is_user_buffer && &res->base.b == buffer->buffer.resource)
8345bf215546Sopenharmony_ci            ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS;
8346bf215546Sopenharmony_ci      }
8347bf215546Sopenharmony_ci   }
8348bf215546Sopenharmony_ci
8349bf215546Sopenharmony_ci   if ((res->bind_history & PIPE_BIND_INDEX_BUFFER) &&
8350bf215546Sopenharmony_ci       ice->state.index_buffer.res) {
8351bf215546Sopenharmony_ci      if (res->bo == crocus_resource_bo(ice->state.index_buffer.res))
8352bf215546Sopenharmony_ci         pipe_resource_reference(&ice->state.index_buffer.res, NULL);
8353bf215546Sopenharmony_ci   }
8354bf215546Sopenharmony_ci   /* There is no need to handle these:
8355bf215546Sopenharmony_ci    * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw)
8356bf215546Sopenharmony_ci    * - PIPE_BIND_QUERY_BUFFER (no persistent state references)
8357bf215546Sopenharmony_ci    */
8358bf215546Sopenharmony_ci
8359bf215546Sopenharmony_ci   if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) {
8360bf215546Sopenharmony_ci      /* XXX: be careful about resetting vs appending... */
8361bf215546Sopenharmony_ci      for (int i = 0; i < 4; i++) {
8362bf215546Sopenharmony_ci         if (ice->state.so_target[i] &&
8363bf215546Sopenharmony_ci             (ice->state.so_target[i]->buffer == &res->base.b)) {
8364bf215546Sopenharmony_ci#if GFX_VER == 6
8365bf215546Sopenharmony_ci            ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS;
8366bf215546Sopenharmony_ci#else
8367bf215546Sopenharmony_ci            ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;
8368bf215546Sopenharmony_ci#endif
8369bf215546Sopenharmony_ci         }
8370bf215546Sopenharmony_ci      }
8371bf215546Sopenharmony_ci   }
8372bf215546Sopenharmony_ci
8373bf215546Sopenharmony_ci   for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) {
8374bf215546Sopenharmony_ci      struct crocus_shader_state *shs = &ice->state.shaders[s];
8375bf215546Sopenharmony_ci      enum pipe_shader_type p_stage = stage_to_pipe(s);
8376bf215546Sopenharmony_ci
8377bf215546Sopenharmony_ci      if (!(res->bind_stages & (1 << s)))
8378bf215546Sopenharmony_ci         continue;
8379bf215546Sopenharmony_ci
8380bf215546Sopenharmony_ci      if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
8381bf215546Sopenharmony_ci         /* Skip constant buffer 0, it's for regular uniforms, not UBOs */
8382bf215546Sopenharmony_ci         uint32_t bound_cbufs = shs->bound_cbufs & ~1u;
8383bf215546Sopenharmony_ci         while (bound_cbufs) {
8384bf215546Sopenharmony_ci            const int i = u_bit_scan(&bound_cbufs);
8385bf215546Sopenharmony_ci            struct pipe_constant_buffer *cbuf = &shs->constbufs[i];
8386bf215546Sopenharmony_ci
8387bf215546Sopenharmony_ci            if (res->bo == crocus_resource_bo(cbuf->buffer)) {
8388bf215546Sopenharmony_ci               ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << s;
8389bf215546Sopenharmony_ci            }
8390bf215546Sopenharmony_ci         }
8391bf215546Sopenharmony_ci      }
8392bf215546Sopenharmony_ci
8393bf215546Sopenharmony_ci      if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {
8394bf215546Sopenharmony_ci         uint32_t bound_ssbos = shs->bound_ssbos;
8395bf215546Sopenharmony_ci         while (bound_ssbos) {
8396bf215546Sopenharmony_ci            const int i = u_bit_scan(&bound_ssbos);
8397bf215546Sopenharmony_ci            struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
8398bf215546Sopenharmony_ci
8399bf215546Sopenharmony_ci            if (res->bo == crocus_resource_bo(ssbo->buffer)) {
8400bf215546Sopenharmony_ci               struct pipe_shader_buffer buf = {
8401bf215546Sopenharmony_ci                  .buffer = &res->base.b,
8402bf215546Sopenharmony_ci                  .buffer_offset = ssbo->buffer_offset,
8403bf215546Sopenharmony_ci                  .buffer_size = ssbo->buffer_size,
8404bf215546Sopenharmony_ci               };
8405bf215546Sopenharmony_ci               crocus_set_shader_buffers(ctx, p_stage, i, 1, &buf,
8406bf215546Sopenharmony_ci                                         (shs->writable_ssbos >> i) & 1);
8407bf215546Sopenharmony_ci            }
8408bf215546Sopenharmony_ci         }
8409bf215546Sopenharmony_ci      }
8410bf215546Sopenharmony_ci
8411bf215546Sopenharmony_ci      if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) {
8412bf215546Sopenharmony_ci         uint32_t bound_sampler_views = shs->bound_sampler_views;
8413bf215546Sopenharmony_ci         while (bound_sampler_views) {
8414bf215546Sopenharmony_ci            const int i = u_bit_scan(&bound_sampler_views);
8415bf215546Sopenharmony_ci            struct crocus_sampler_view *isv = shs->textures[i];
8416bf215546Sopenharmony_ci            struct crocus_bo *bo = isv->res->bo;
8417bf215546Sopenharmony_ci
8418bf215546Sopenharmony_ci            if (res->bo == bo) {
8419bf215546Sopenharmony_ci               ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s;
8420bf215546Sopenharmony_ci            }
8421bf215546Sopenharmony_ci         }
8422bf215546Sopenharmony_ci      }
8423bf215546Sopenharmony_ci
8424bf215546Sopenharmony_ci      if (res->bind_history & PIPE_BIND_SHADER_IMAGE) {
8425bf215546Sopenharmony_ci         uint32_t bound_image_views = shs->bound_image_views;
8426bf215546Sopenharmony_ci         while (bound_image_views) {
8427bf215546Sopenharmony_ci            const int i = u_bit_scan(&bound_image_views);
8428bf215546Sopenharmony_ci            struct crocus_image_view *iv = &shs->image[i];
8429bf215546Sopenharmony_ci            struct crocus_bo *bo = crocus_resource_bo(iv->base.resource);
8430bf215546Sopenharmony_ci
8431bf215546Sopenharmony_ci            if (res->bo == bo)
8432bf215546Sopenharmony_ci               ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s;
8433bf215546Sopenharmony_ci         }
8434bf215546Sopenharmony_ci      }
8435bf215546Sopenharmony_ci   }
8436bf215546Sopenharmony_ci}
8437bf215546Sopenharmony_ci
8438bf215546Sopenharmony_ci/* ------------------------------------------------------------------- */
8439bf215546Sopenharmony_ci
8440bf215546Sopenharmony_cistatic unsigned
8441bf215546Sopenharmony_ciflags_to_post_sync_op(uint32_t flags)
8442bf215546Sopenharmony_ci{
8443bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
8444bf215546Sopenharmony_ci      return WriteImmediateData;
8445bf215546Sopenharmony_ci
8446bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
8447bf215546Sopenharmony_ci      return WritePSDepthCount;
8448bf215546Sopenharmony_ci
8449bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
8450bf215546Sopenharmony_ci      return WriteTimestamp;
8451bf215546Sopenharmony_ci
8452bf215546Sopenharmony_ci   return 0;
8453bf215546Sopenharmony_ci}
8454bf215546Sopenharmony_ci
8455bf215546Sopenharmony_ci/*
8456bf215546Sopenharmony_ci * Do the given flags have a Post Sync or LRI Post Sync operation?
8457bf215546Sopenharmony_ci */
8458bf215546Sopenharmony_cistatic enum pipe_control_flags
8459bf215546Sopenharmony_ciget_post_sync_flags(enum pipe_control_flags flags)
8460bf215546Sopenharmony_ci{
8461bf215546Sopenharmony_ci   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
8462bf215546Sopenharmony_ci            PIPE_CONTROL_WRITE_DEPTH_COUNT |
8463bf215546Sopenharmony_ci            PIPE_CONTROL_WRITE_TIMESTAMP |
8464bf215546Sopenharmony_ci            PIPE_CONTROL_LRI_POST_SYNC_OP;
8465bf215546Sopenharmony_ci
8466bf215546Sopenharmony_ci   /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
8467bf215546Sopenharmony_ci    * "LRI Post Sync Operation".  So more than one bit set would be illegal.
8468bf215546Sopenharmony_ci    */
8469bf215546Sopenharmony_ci   assert(util_bitcount(flags) <= 1);
8470bf215546Sopenharmony_ci
8471bf215546Sopenharmony_ci   return flags;
8472bf215546Sopenharmony_ci}
8473bf215546Sopenharmony_ci
8474bf215546Sopenharmony_ci#define IS_COMPUTE_PIPELINE(batch) (batch->name == CROCUS_BATCH_COMPUTE)
8475bf215546Sopenharmony_ci
8476bf215546Sopenharmony_ci/**
8477bf215546Sopenharmony_ci * Emit a series of PIPE_CONTROL commands, taking into account any
8478bf215546Sopenharmony_ci * workarounds necessary to actually accomplish the caller's request.
8479bf215546Sopenharmony_ci *
8480bf215546Sopenharmony_ci * Unless otherwise noted, spec quotations in this function come from:
8481bf215546Sopenharmony_ci *
8482bf215546Sopenharmony_ci * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
8483bf215546Sopenharmony_ci * Restrictions for PIPE_CONTROL.
8484bf215546Sopenharmony_ci *
8485bf215546Sopenharmony_ci * You should not use this function directly.  Use the helpers in
8486bf215546Sopenharmony_ci * crocus_pipe_control.c instead, which may split the pipe control further.
8487bf215546Sopenharmony_ci */
8488bf215546Sopenharmony_cistatic void
8489bf215546Sopenharmony_cicrocus_emit_raw_pipe_control(struct crocus_batch *batch,
8490bf215546Sopenharmony_ci                             const char *reason,
8491bf215546Sopenharmony_ci                             uint32_t flags,
8492bf215546Sopenharmony_ci                             struct crocus_bo *bo,
8493bf215546Sopenharmony_ci                             uint32_t offset,
8494bf215546Sopenharmony_ci                             uint64_t imm)
8495bf215546Sopenharmony_ci{
8496bf215546Sopenharmony_ci   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
8497bf215546Sopenharmony_ci   enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
8498bf215546Sopenharmony_ci   UNUSED enum pipe_control_flags non_lri_post_sync_flags =
8499bf215546Sopenharmony_ci      post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
8500bf215546Sopenharmony_ci
8501bf215546Sopenharmony_ci   /* Recursive PIPE_CONTROL workarounds --------------------------------
8502bf215546Sopenharmony_ci    * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
8503bf215546Sopenharmony_ci    *
8504bf215546Sopenharmony_ci    * We do these first because we want to look at the original operation,
8505bf215546Sopenharmony_ci    * rather than any workarounds we set.
8506bf215546Sopenharmony_ci    */
8507bf215546Sopenharmony_ci
8508bf215546Sopenharmony_ci   /* "Flush Types" workarounds ---------------------------------------------
8509bf215546Sopenharmony_ci    * We do these now because they may add post-sync operations or CS stalls.
8510bf215546Sopenharmony_ci    */
8511bf215546Sopenharmony_ci
8512bf215546Sopenharmony_ci   if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
8513bf215546Sopenharmony_ci      /* Hardware workaround: SNB B-Spec says:
8514bf215546Sopenharmony_ci       *
8515bf215546Sopenharmony_ci       *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
8516bf215546Sopenharmony_ci       *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
8517bf215546Sopenharmony_ci       *     required."
8518bf215546Sopenharmony_ci       */
8519bf215546Sopenharmony_ci      crocus_emit_post_sync_nonzero_flush(batch);
8520bf215546Sopenharmony_ci   }
8521bf215546Sopenharmony_ci
8522bf215546Sopenharmony_ci#if GFX_VER == 8
8523bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
8524bf215546Sopenharmony_ci      /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
8525bf215546Sopenharmony_ci       *
8526bf215546Sopenharmony_ci       * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
8527bf215546Sopenharmony_ci       *  'Write PS Depth Count' or 'Write Timestamp'."
8528bf215546Sopenharmony_ci       */
8529bf215546Sopenharmony_ci      if (!bo) {
8530bf215546Sopenharmony_ci         flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
8531bf215546Sopenharmony_ci         post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
8532bf215546Sopenharmony_ci         non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
8533bf215546Sopenharmony_ci         bo = batch->ice->workaround_bo;
8534bf215546Sopenharmony_ci         offset = batch->ice->workaround_offset;
8535bf215546Sopenharmony_ci      }
8536bf215546Sopenharmony_ci   }
8537bf215546Sopenharmony_ci#endif
8538bf215546Sopenharmony_ci
8539bf215546Sopenharmony_ci#if GFX_VERx10 < 75
8540bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_DEPTH_STALL) {
8541bf215546Sopenharmony_ci      /* Project: PRE-HSW / Argument: Depth Stall
8542bf215546Sopenharmony_ci       *
8543bf215546Sopenharmony_ci       * "The following bits must be clear:
8544bf215546Sopenharmony_ci       *  - Render Target Cache Flush Enable ([12] of DW1)
8545bf215546Sopenharmony_ci       *  - Depth Cache Flush Enable ([0] of DW1)"
8546bf215546Sopenharmony_ci       */
8547bf215546Sopenharmony_ci      assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
8548bf215546Sopenharmony_ci                        PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
8549bf215546Sopenharmony_ci   }
8550bf215546Sopenharmony_ci#endif
8551bf215546Sopenharmony_ci   if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
8552bf215546Sopenharmony_ci      /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
8553bf215546Sopenharmony_ci       *
8554bf215546Sopenharmony_ci       *    "This bit must be DISABLED for operations other than writing
8555bf215546Sopenharmony_ci       *     PS_DEPTH_COUNT."
8556bf215546Sopenharmony_ci       *
8557bf215546Sopenharmony_ci       * This seems like nonsense.  An Ivybridge workaround requires us to
8558bf215546Sopenharmony_ci       * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
8559bf215546Sopenharmony_ci       * operation.  Gen8+ requires us to emit depth stalls and depth cache
8560bf215546Sopenharmony_ci       * flushes together.  So, it's hard to imagine this means anything other
8561bf215546Sopenharmony_ci       * than "we originally intended this to be used for PS_DEPTH_COUNT".
8562bf215546Sopenharmony_ci       *
8563bf215546Sopenharmony_ci       * We ignore the supposed restriction and do nothing.
8564bf215546Sopenharmony_ci       */
8565bf215546Sopenharmony_ci   }
8566bf215546Sopenharmony_ci
8567bf215546Sopenharmony_ci   if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
8568bf215546Sopenharmony_ci      /* Project: PRE-HSW / Argument: Depth Cache Flush
8569bf215546Sopenharmony_ci       *
8570bf215546Sopenharmony_ci       * "Depth Stall must be clear ([13] of DW1)."
8571bf215546Sopenharmony_ci       */
8572bf215546Sopenharmony_ci      assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
8573bf215546Sopenharmony_ci   }
8574bf215546Sopenharmony_ci
8575bf215546Sopenharmony_ci   if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
8576bf215546Sopenharmony_ci                PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
8577bf215546Sopenharmony_ci      /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
8578bf215546Sopenharmony_ci       *
8579bf215546Sopenharmony_ci       *    "This bit must be DISABLED for End-of-pipe (Read) fences,
8580bf215546Sopenharmony_ci       *     PS_DEPTH_COUNT or TIMESTAMP queries."
8581bf215546Sopenharmony_ci       *
8582bf215546Sopenharmony_ci       * TODO: Implement end-of-pipe checking.
8583bf215546Sopenharmony_ci       */
8584bf215546Sopenharmony_ci      assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
8585bf215546Sopenharmony_ci                                  PIPE_CONTROL_WRITE_TIMESTAMP)));
8586bf215546Sopenharmony_ci   }
8587bf215546Sopenharmony_ci
8588bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) {
8589bf215546Sopenharmony_ci      /* From the PIPE_CONTROL instruction table, bit 1:
8590bf215546Sopenharmony_ci       *
8591bf215546Sopenharmony_ci       *    "This bit is ignored if Depth Stall Enable is set.
8592bf215546Sopenharmony_ci       *     Further, the render cache is not flushed even if Write Cache
8593bf215546Sopenharmony_ci       *     Flush Enable bit is set."
8594bf215546Sopenharmony_ci       *
8595bf215546Sopenharmony_ci       * We assert that the caller doesn't do this combination, to try and
8596bf215546Sopenharmony_ci       * prevent mistakes.  It shouldn't hurt the GPU, though.
8597bf215546Sopenharmony_ci       *
8598bf215546Sopenharmony_ci       * We skip this check on Gen11+ as the "Stall at Pixel Scoreboard"
8599bf215546Sopenharmony_ci       * and "Render Target Flush" combo is explicitly required for BTI
8600bf215546Sopenharmony_ci       * update workarounds.
8601bf215546Sopenharmony_ci       */
8602bf215546Sopenharmony_ci      assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
8603bf215546Sopenharmony_ci                        PIPE_CONTROL_RENDER_TARGET_FLUSH)));
8604bf215546Sopenharmony_ci   }
8605bf215546Sopenharmony_ci
8606bf215546Sopenharmony_ci   /* PIPE_CONTROL page workarounds ------------------------------------- */
8607bf215546Sopenharmony_ci
8608bf215546Sopenharmony_ci   if (GFX_VER >= 7 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
8609bf215546Sopenharmony_ci      /* From the PIPE_CONTROL page itself:
8610bf215546Sopenharmony_ci       *
8611bf215546Sopenharmony_ci       *    "IVB, HSW, BDW
8612bf215546Sopenharmony_ci       *     Restriction: Pipe_control with CS-stall bit set must be issued
8613bf215546Sopenharmony_ci       *     before a pipe-control command that has the State Cache
8614bf215546Sopenharmony_ci       *     Invalidate bit set."
8615bf215546Sopenharmony_ci       */
8616bf215546Sopenharmony_ci      flags |= PIPE_CONTROL_CS_STALL;
8617bf215546Sopenharmony_ci   }
8618bf215546Sopenharmony_ci
8619bf215546Sopenharmony_ci   if ((GFX_VERx10 == 75)) {
8620bf215546Sopenharmony_ci      /* From the PIPE_CONTROL page itself:
8621bf215546Sopenharmony_ci       *
8622bf215546Sopenharmony_ci       *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
8623bf215546Sopenharmony_ci       *     Prior to programming a PIPECONTROL command with any of the RO
8624bf215546Sopenharmony_ci       *     cache invalidation bit set, program a PIPECONTROL flush command
8625bf215546Sopenharmony_ci       *     with “CS stall” bit and “HDC Flush” bit set."
8626bf215546Sopenharmony_ci       *
8627bf215546Sopenharmony_ci       * TODO: Actually implement this.  What's an HDC Flush?
8628bf215546Sopenharmony_ci       */
8629bf215546Sopenharmony_ci   }
8630bf215546Sopenharmony_ci
8631bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_FLUSH_LLC) {
8632bf215546Sopenharmony_ci      /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
8633bf215546Sopenharmony_ci       *
8634bf215546Sopenharmony_ci       *    "Project: ALL
8635bf215546Sopenharmony_ci       *     SW must always program Post-Sync Operation to "Write Immediate
8636bf215546Sopenharmony_ci       *     Data" when Flush LLC is set."
8637bf215546Sopenharmony_ci       *
8638bf215546Sopenharmony_ci       * For now, we just require the caller to do it.
8639bf215546Sopenharmony_ci       */
8640bf215546Sopenharmony_ci      assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
8641bf215546Sopenharmony_ci   }
8642bf215546Sopenharmony_ci
8643bf215546Sopenharmony_ci   /* "Post-Sync Operation" workarounds -------------------------------- */
8644bf215546Sopenharmony_ci
8645bf215546Sopenharmony_ci   /* Project: All / Argument: Global Snapshot Count Reset [19]
8646bf215546Sopenharmony_ci    *
8647bf215546Sopenharmony_ci    * "This bit must not be exercised on any product.
8648bf215546Sopenharmony_ci    *  Requires stall bit ([20] of DW1) set."
8649bf215546Sopenharmony_ci    *
8650bf215546Sopenharmony_ci    * We don't use this, so we just assert that it isn't used.  The
8651bf215546Sopenharmony_ci    * PIPE_CONTROL instruction page indicates that they intended this
8652bf215546Sopenharmony_ci    * as a debug feature and don't think it is useful in production,
8653bf215546Sopenharmony_ci    * but it may actually be usable, should we ever want to.
8654bf215546Sopenharmony_ci    */
8655bf215546Sopenharmony_ci   assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
8656bf215546Sopenharmony_ci
8657bf215546Sopenharmony_ci   if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
8658bf215546Sopenharmony_ci                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
8659bf215546Sopenharmony_ci      /* Project: All / Arguments:
8660bf215546Sopenharmony_ci       *
8661bf215546Sopenharmony_ci       * - Generic Media State Clear [16]
8662bf215546Sopenharmony_ci       * - Indirect State Pointers Disable [16]
8663bf215546Sopenharmony_ci       *
8664bf215546Sopenharmony_ci       *    "Requires stall bit ([20] of DW1) set."
8665bf215546Sopenharmony_ci       *
8666bf215546Sopenharmony_ci       * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
8667bf215546Sopenharmony_ci       * State Clear) says:
8668bf215546Sopenharmony_ci       *
8669bf215546Sopenharmony_ci       *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
8670bf215546Sopenharmony_ci       *     programmed prior to programming a PIPECONTROL command with "Media
8671bf215546Sopenharmony_ci       *     State Clear" set in GPGPU mode of operation"
8672bf215546Sopenharmony_ci       *
8673bf215546Sopenharmony_ci       * This is a subset of the earlier rule, so there's nothing to do.
8674bf215546Sopenharmony_ci       */
8675bf215546Sopenharmony_ci      flags |= PIPE_CONTROL_CS_STALL;
8676bf215546Sopenharmony_ci   }
8677bf215546Sopenharmony_ci
8678bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
8679bf215546Sopenharmony_ci      /* Project: All / Argument: Store Data Index
8680bf215546Sopenharmony_ci       *
8681bf215546Sopenharmony_ci       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
8682bf215546Sopenharmony_ci       *  than '0'."
8683bf215546Sopenharmony_ci       *
8684bf215546Sopenharmony_ci       * For now, we just assert that the caller does this.  We might want to
8685bf215546Sopenharmony_ci       * automatically add a write to the workaround BO...
8686bf215546Sopenharmony_ci       */
8687bf215546Sopenharmony_ci      assert(non_lri_post_sync_flags != 0);
8688bf215546Sopenharmony_ci   }
8689bf215546Sopenharmony_ci
8690bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_SYNC_GFDT) {
8691bf215546Sopenharmony_ci      /* Project: All / Argument: Sync GFDT
8692bf215546Sopenharmony_ci       *
8693bf215546Sopenharmony_ci       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
8694bf215546Sopenharmony_ci       *  than '0' or 0x2520[13] must be set."
8695bf215546Sopenharmony_ci       *
8696bf215546Sopenharmony_ci       * For now, we just assert that the caller does this.
8697bf215546Sopenharmony_ci       */
8698bf215546Sopenharmony_ci      assert(non_lri_post_sync_flags != 0);
8699bf215546Sopenharmony_ci   }
8700bf215546Sopenharmony_ci
8701bf215546Sopenharmony_ci   if (GFX_VER >= 6 && GFX_VER < 8 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
8702bf215546Sopenharmony_ci      /* Project: SNB, IVB, HSW / Argument: TLB inv
8703bf215546Sopenharmony_ci       *
8704bf215546Sopenharmony_ci       * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
8705bf215546Sopenharmony_ci       *  must be set to something other than '0'."
8706bf215546Sopenharmony_ci       *
8707bf215546Sopenharmony_ci       * For now, we just assert that the caller does this.
8708bf215546Sopenharmony_ci       */
8709bf215546Sopenharmony_ci      assert(non_lri_post_sync_flags != 0);
8710bf215546Sopenharmony_ci   }
8711bf215546Sopenharmony_ci
8712bf215546Sopenharmony_ci   if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
8713bf215546Sopenharmony_ci      /* Project: IVB+ / Argument: TLB inv
8714bf215546Sopenharmony_ci       *
8715bf215546Sopenharmony_ci       *    "Requires stall bit ([20] of DW1) set."
8716bf215546Sopenharmony_ci       *
8717bf215546Sopenharmony_ci       * Also, from the PIPE_CONTROL instruction table:
8718bf215546Sopenharmony_ci       *
8719bf215546Sopenharmony_ci       *    "Project: SKL+
8720bf215546Sopenharmony_ci       *     Post Sync Operation or CS stall must be set to ensure a TLB
8721bf215546Sopenharmony_ci       *     invalidation occurs.  Otherwise no cycle will occur to the TLB
8722bf215546Sopenharmony_ci       *     cache to invalidate."
8723bf215546Sopenharmony_ci       *
8724bf215546Sopenharmony_ci       * This is not a subset of the earlier rule, so there's nothing to do.
8725bf215546Sopenharmony_ci       */
8726bf215546Sopenharmony_ci      flags |= PIPE_CONTROL_CS_STALL;
8727bf215546Sopenharmony_ci   }
8728bf215546Sopenharmony_ci#if GFX_VER == 8
8729bf215546Sopenharmony_ci   if (IS_COMPUTE_PIPELINE(batch)) {
8730bf215546Sopenharmony_ci      if (post_sync_flags ||
8731bf215546Sopenharmony_ci          (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
8732bf215546Sopenharmony_ci                    PIPE_CONTROL_DEPTH_STALL |
8733bf215546Sopenharmony_ci                    PIPE_CONTROL_RENDER_TARGET_FLUSH |
8734bf215546Sopenharmony_ci                    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
8735bf215546Sopenharmony_ci                    PIPE_CONTROL_DATA_CACHE_FLUSH))) {
8736bf215546Sopenharmony_ci         /* Project: BDW / Arguments:
8737bf215546Sopenharmony_ci          *
8738bf215546Sopenharmony_ci          * - LRI Post Sync Operation   [23]
8739bf215546Sopenharmony_ci          * - Post Sync Op              [15:14]
8740bf215546Sopenharmony_ci          * - Notify En                 [8]
8741bf215546Sopenharmony_ci          * - Depth Stall               [13]
8742bf215546Sopenharmony_ci          * - Render Target Cache Flush [12]
8743bf215546Sopenharmony_ci          * - Depth Cache Flush         [0]
8744bf215546Sopenharmony_ci          * - DC Flush Enable           [5]
8745bf215546Sopenharmony_ci          *
8746bf215546Sopenharmony_ci          *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
8747bf215546Sopenharmony_ci          *     Workloads."
8748bf215546Sopenharmony_ci          *
8749bf215546Sopenharmony_ci          * (The docs have separate table rows for each bit, with essentially
8750bf215546Sopenharmony_ci          * the same workaround text.  We've combined them here.)
8751bf215546Sopenharmony_ci          */
8752bf215546Sopenharmony_ci         flags |= PIPE_CONTROL_CS_STALL;
8753bf215546Sopenharmony_ci
8754bf215546Sopenharmony_ci         /* Also, from the PIPE_CONTROL instruction table, bit 20:
8755bf215546Sopenharmony_ci          *
8756bf215546Sopenharmony_ci          *    "Project: BDW
8757bf215546Sopenharmony_ci          *     This bit must be always set when PIPE_CONTROL command is
8758bf215546Sopenharmony_ci          *     programmed by GPGPU and MEDIA workloads, except for the cases
8759bf215546Sopenharmony_ci          *     when only Read Only Cache Invalidation bits are set (State
8760bf215546Sopenharmony_ci          *     Cache Invalidation Enable, Instruction cache Invalidation
8761bf215546Sopenharmony_ci          *     Enable, Texture Cache Invalidation Enable, Constant Cache
8762bf215546Sopenharmony_ci          *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
8763bf215546Sopenharmony_ci          *     need not implemented when FF_DOP_CG is disable via "Fixed
8764bf215546Sopenharmony_ci          *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
8765bf215546Sopenharmony_ci          *
8766bf215546Sopenharmony_ci          * It sounds like we could avoid CS stalls in some cases, but we
8767bf215546Sopenharmony_ci          * don't currently bother.  This list isn't exactly the list above,
8768bf215546Sopenharmony_ci          * either...
8769bf215546Sopenharmony_ci          */
8770bf215546Sopenharmony_ci      }
8771bf215546Sopenharmony_ci   }
8772bf215546Sopenharmony_ci#endif
8773bf215546Sopenharmony_ci   /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
8774bf215546Sopenharmony_ci    *
8775bf215546Sopenharmony_ci    * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
8776bf215546Sopenharmony_ci    *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
8777bf215546Sopenharmony_ci    *
8778bf215546Sopenharmony_ci    * Note that the kernel does CS stalls between batches, so we only need
8779bf215546Sopenharmony_ci    * to count them within a batch.  We currently naively count every 4, and
8780bf215546Sopenharmony_ci    * don't skip the ones with only read-cache-invalidate bits set.  This
8781bf215546Sopenharmony_ci    * may or may not be a problem...
8782bf215546Sopenharmony_ci    */
8783bf215546Sopenharmony_ci   if (GFX_VER == 7 && !(GFX_VERx10 == 75)) {
8784bf215546Sopenharmony_ci      if (flags & PIPE_CONTROL_CS_STALL) {
8785bf215546Sopenharmony_ci         /* If we're doing a CS stall, reset the counter and carry on. */
8786bf215546Sopenharmony_ci         batch->pipe_controls_since_last_cs_stall = 0;
8787bf215546Sopenharmony_ci      }
8788bf215546Sopenharmony_ci
8789bf215546Sopenharmony_ci      /* If this is the fourth pipe control without a CS stall, do one now. */
8790bf215546Sopenharmony_ci      if (++batch->pipe_controls_since_last_cs_stall == 4) {
8791bf215546Sopenharmony_ci         batch->pipe_controls_since_last_cs_stall = 0;
8792bf215546Sopenharmony_ci         flags |= PIPE_CONTROL_CS_STALL;
8793bf215546Sopenharmony_ci      }
8794bf215546Sopenharmony_ci   }
8795bf215546Sopenharmony_ci
8796bf215546Sopenharmony_ci   /* "Stall" workarounds ----------------------------------------------
8797bf215546Sopenharmony_ci    * These have to come after the earlier ones because we may have added
8798bf215546Sopenharmony_ci    * some additional CS stalls above.
8799bf215546Sopenharmony_ci    */
8800bf215546Sopenharmony_ci
8801bf215546Sopenharmony_ci   if (flags & PIPE_CONTROL_CS_STALL) {
8802bf215546Sopenharmony_ci      /* Project: PRE-SKL, VLV, CHV
8803bf215546Sopenharmony_ci       *
8804bf215546Sopenharmony_ci       * "[All Stepping][All SKUs]:
8805bf215546Sopenharmony_ci       *
8806bf215546Sopenharmony_ci       *  One of the following must also be set:
8807bf215546Sopenharmony_ci       *
8808bf215546Sopenharmony_ci       *  - Render Target Cache Flush Enable ([12] of DW1)
8809bf215546Sopenharmony_ci       *  - Depth Cache Flush Enable ([0] of DW1)
8810bf215546Sopenharmony_ci       *  - Stall at Pixel Scoreboard ([1] of DW1)
8811bf215546Sopenharmony_ci       *  - Depth Stall ([13] of DW1)
8812bf215546Sopenharmony_ci       *  - Post-Sync Operation ([13] of DW1)
8813bf215546Sopenharmony_ci       *  - DC Flush Enable ([5] of DW1)"
8814bf215546Sopenharmony_ci       *
8815bf215546Sopenharmony_ci       * If we don't already have one of those bits set, we choose to add
8816bf215546Sopenharmony_ci       * "Stall at Pixel Scoreboard".  Some of the other bits require a
8817bf215546Sopenharmony_ci       * CS stall as a workaround (see above), which would send us into
8818bf215546Sopenharmony_ci       * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
8819bf215546Sopenharmony_ci       * appears to be safe, so we choose that.
8820bf215546Sopenharmony_ci       */
8821bf215546Sopenharmony_ci      const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
8822bf215546Sopenharmony_ci                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
8823bf215546Sopenharmony_ci                               PIPE_CONTROL_WRITE_IMMEDIATE |
8824bf215546Sopenharmony_ci                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
8825bf215546Sopenharmony_ci                               PIPE_CONTROL_WRITE_TIMESTAMP |
8826bf215546Sopenharmony_ci                               PIPE_CONTROL_STALL_AT_SCOREBOARD |
8827bf215546Sopenharmony_ci                               PIPE_CONTROL_DEPTH_STALL |
8828bf215546Sopenharmony_ci                               PIPE_CONTROL_DATA_CACHE_FLUSH;
8829bf215546Sopenharmony_ci      if (!(flags & wa_bits))
8830bf215546Sopenharmony_ci         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
8831bf215546Sopenharmony_ci   }
8832bf215546Sopenharmony_ci
8833bf215546Sopenharmony_ci   /* Emit --------------------------------------------------------------- */
8834bf215546Sopenharmony_ci
8835bf215546Sopenharmony_ci   if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) {
8836bf215546Sopenharmony_ci      fprintf(stderr,
8837bf215546Sopenharmony_ci              "  PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
8838bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "",
8839bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "",
8840bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "",
8841bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "",
8842bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "",
8843bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "",
8844bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "",
8845bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "",
8846bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "",
8847bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "",
8848bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "",
8849bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "",
8850bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "",
8851bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "",
8852bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "",
8853bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ?
8854bf215546Sopenharmony_ci              "SnapRes" : "",
8855bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ?
8856bf215546Sopenharmony_ci              "ISPDis" : "",
8857bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "",
8858bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "",
8859bf215546Sopenharmony_ci              (flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "",
8860bf215546Sopenharmony_ci              imm, reason);
8861bf215546Sopenharmony_ci   }
8862bf215546Sopenharmony_ci
8863bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
8864bf215546Sopenharmony_ci#if GFX_VER >= 7
8865bf215546Sopenharmony_ci      pc.LRIPostSyncOperation = NoLRIOperation;
8866bf215546Sopenharmony_ci      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
8867bf215546Sopenharmony_ci      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
8868bf215546Sopenharmony_ci#endif
8869bf215546Sopenharmony_ci#if GFX_VER >= 6
8870bf215546Sopenharmony_ci      pc.StoreDataIndex = 0;
8871bf215546Sopenharmony_ci      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
8872bf215546Sopenharmony_ci      pc.GlobalSnapshotCountReset =
8873bf215546Sopenharmony_ci         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
8874bf215546Sopenharmony_ci      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
8875bf215546Sopenharmony_ci      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
8876bf215546Sopenharmony_ci      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
8877bf215546Sopenharmony_ci      pc.RenderTargetCacheFlushEnable =
8878bf215546Sopenharmony_ci         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
8879bf215546Sopenharmony_ci      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
8880bf215546Sopenharmony_ci      pc.StateCacheInvalidationEnable =
8881bf215546Sopenharmony_ci         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
8882bf215546Sopenharmony_ci      pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
8883bf215546Sopenharmony_ci      pc.ConstantCacheInvalidationEnable =
8884bf215546Sopenharmony_ci         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
8885bf215546Sopenharmony_ci#else
8886bf215546Sopenharmony_ci      pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
8887bf215546Sopenharmony_ci#endif
8888bf215546Sopenharmony_ci      pc.PostSyncOperation = flags_to_post_sync_op(flags);
8889bf215546Sopenharmony_ci      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
8890bf215546Sopenharmony_ci      pc.InstructionCacheInvalidateEnable =
8891bf215546Sopenharmony_ci         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
8892bf215546Sopenharmony_ci      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
8893bf215546Sopenharmony_ci#if GFX_VER >= 5 || GFX_VERx10 == 45
8894bf215546Sopenharmony_ci      pc.IndirectStatePointersDisable =
8895bf215546Sopenharmony_ci         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
8896bf215546Sopenharmony_ci#endif
8897bf215546Sopenharmony_ci#if GFX_VER >= 6
8898bf215546Sopenharmony_ci      pc.TextureCacheInvalidationEnable =
8899bf215546Sopenharmony_ci         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
8900bf215546Sopenharmony_ci#elif GFX_VER == 5 || GFX_VERx10 == 45
8901bf215546Sopenharmony_ci      pc.TextureCacheFlushEnable =
8902bf215546Sopenharmony_ci         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
8903bf215546Sopenharmony_ci#endif
8904bf215546Sopenharmony_ci      pc.Address = ggtt_bo(bo, offset);
8905bf215546Sopenharmony_ci      if (GFX_VER < 7 && bo)
8906bf215546Sopenharmony_ci         pc.DestinationAddressType = DAT_GGTT;
8907bf215546Sopenharmony_ci      pc.ImmediateData = imm;
8908bf215546Sopenharmony_ci   }
8909bf215546Sopenharmony_ci}
8910bf215546Sopenharmony_ci
8911bf215546Sopenharmony_ci#if GFX_VER == 6
8912bf215546Sopenharmony_civoid
8913bf215546Sopenharmony_cigenX(crocus_upload_urb)(struct crocus_batch *batch,
8914bf215546Sopenharmony_ci                        unsigned vs_size,
8915bf215546Sopenharmony_ci                        bool gs_present,
8916bf215546Sopenharmony_ci                        unsigned gs_size)
8917bf215546Sopenharmony_ci{
8918bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
8919bf215546Sopenharmony_ci   int nr_vs_entries, nr_gs_entries;
8920bf215546Sopenharmony_ci   int total_urb_size = ice->urb.size * 1024; /* in bytes */
8921bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &batch->screen->devinfo;
8922bf215546Sopenharmony_ci
8923bf215546Sopenharmony_ci   /* Calculate how many entries fit in each stage's section of the URB */
8924bf215546Sopenharmony_ci   if (gs_present) {
8925bf215546Sopenharmony_ci      nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
8926bf215546Sopenharmony_ci      nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
8927bf215546Sopenharmony_ci   } else {
8928bf215546Sopenharmony_ci      nr_vs_entries = total_urb_size / (vs_size * 128);
8929bf215546Sopenharmony_ci      nr_gs_entries = 0;
8930bf215546Sopenharmony_ci   }
8931bf215546Sopenharmony_ci
8932bf215546Sopenharmony_ci   /* Then clamp to the maximum allowed by the hardware */
8933bf215546Sopenharmony_ci   if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX])
8934bf215546Sopenharmony_ci      nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX];
8935bf215546Sopenharmony_ci
8936bf215546Sopenharmony_ci   if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY])
8937bf215546Sopenharmony_ci      nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY];
8938bf215546Sopenharmony_ci
8939bf215546Sopenharmony_ci   /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
8940bf215546Sopenharmony_ci   ice->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
8941bf215546Sopenharmony_ci   ice->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);
8942bf215546Sopenharmony_ci
8943bf215546Sopenharmony_ci   assert(ice->urb.nr_vs_entries >=
8944bf215546Sopenharmony_ci          devinfo->urb.min_entries[MESA_SHADER_VERTEX]);
8945bf215546Sopenharmony_ci   assert(ice->urb.nr_vs_entries % 4 == 0);
8946bf215546Sopenharmony_ci   assert(ice->urb.nr_gs_entries % 4 == 0);
8947bf215546Sopenharmony_ci   assert(vs_size <= 5);
8948bf215546Sopenharmony_ci   assert(gs_size <= 5);
8949bf215546Sopenharmony_ci
8950bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(3DSTATE_URB), urb) {
8951bf215546Sopenharmony_ci      urb.VSNumberofURBEntries = ice->urb.nr_vs_entries;
8952bf215546Sopenharmony_ci      urb.VSURBEntryAllocationSize = vs_size - 1;
8953bf215546Sopenharmony_ci
8954bf215546Sopenharmony_ci      urb.GSNumberofURBEntries = ice->urb.nr_gs_entries;
8955bf215546Sopenharmony_ci      urb.GSURBEntryAllocationSize = gs_size - 1;
8956bf215546Sopenharmony_ci   };
8957bf215546Sopenharmony_ci   /* From the PRM Volume 2 part 1, section 1.4.7:
8958bf215546Sopenharmony_ci    *
8959bf215546Sopenharmony_ci    *   Because of a urb corruption caused by allocating a previous gsunit’s
8960bf215546Sopenharmony_ci    *   urb entry to vsunit software is required to send a "GS NULL
8961bf215546Sopenharmony_ci    *   Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
8962bf215546Sopenharmony_ci    *   a dummy DRAW call before any case where VS will be taking over GS URB
8963bf215546Sopenharmony_ci    *   space.
8964bf215546Sopenharmony_ci    *
8965bf215546Sopenharmony_ci    * It is not clear exactly what this means ("URB fence" is a command that
8966bf215546Sopenharmony_ci    * doesn't exist on Gen6).  So for now we just do a full pipeline flush as
8967bf215546Sopenharmony_ci    * a workaround.
8968bf215546Sopenharmony_ci    */
8969bf215546Sopenharmony_ci   if (ice->urb.gs_present && !gs_present)
8970bf215546Sopenharmony_ci      crocus_emit_mi_flush(batch);
8971bf215546Sopenharmony_ci   ice->urb.gs_present = gs_present;
8972bf215546Sopenharmony_ci}
8973bf215546Sopenharmony_ci#endif
8974bf215546Sopenharmony_ci
8975bf215546Sopenharmony_cistatic void
8976bf215546Sopenharmony_cicrocus_lost_genx_state(struct crocus_context *ice, struct crocus_batch *batch)
8977bf215546Sopenharmony_ci{
8978bf215546Sopenharmony_ci}
8979bf215546Sopenharmony_ci
8980bf215546Sopenharmony_cistatic void
8981bf215546Sopenharmony_cicrocus_emit_mi_report_perf_count(struct crocus_batch *batch,
8982bf215546Sopenharmony_ci                                 struct crocus_bo *bo,
8983bf215546Sopenharmony_ci                                 uint32_t offset_in_bytes,
8984bf215546Sopenharmony_ci                                 uint32_t report_id)
8985bf215546Sopenharmony_ci{
8986bf215546Sopenharmony_ci#if GFX_VER >= 7
8987bf215546Sopenharmony_ci   crocus_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
8988bf215546Sopenharmony_ci      mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes);
8989bf215546Sopenharmony_ci      mi_rpc.ReportID = report_id;
8990bf215546Sopenharmony_ci   }
8991bf215546Sopenharmony_ci#endif
8992bf215546Sopenharmony_ci}
8993bf215546Sopenharmony_ci
8994bf215546Sopenharmony_ci/**
8995bf215546Sopenharmony_ci * From the PRM, Volume 2a:
8996bf215546Sopenharmony_ci *
8997bf215546Sopenharmony_ci *    "Indirect State Pointers Disable
8998bf215546Sopenharmony_ci *
8999bf215546Sopenharmony_ci *    At the completion of the post-sync operation associated with this pipe
9000bf215546Sopenharmony_ci *    control packet, the indirect state pointers in the hardware are
9001bf215546Sopenharmony_ci *    considered invalid; the indirect pointers are not saved in the context.
9002bf215546Sopenharmony_ci *    If any new indirect state commands are executed in the command stream
9003bf215546Sopenharmony_ci *    while the pipe control is pending, the new indirect state commands are
9004bf215546Sopenharmony_ci *    preserved.
9005bf215546Sopenharmony_ci *
9006bf215546Sopenharmony_ci *    [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
9007bf215546Sopenharmony_ci *    restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
9008bf215546Sopenharmony_ci *    commands are only considered as Indirect State Pointers. Once ISP is
9009bf215546Sopenharmony_ci *    issued in a context, SW must initialize by programming push constant
9010bf215546Sopenharmony_ci *    commands for all the shaders (at least to zero length) before attempting
9011bf215546Sopenharmony_ci *    any rendering operation for the same context."
9012bf215546Sopenharmony_ci *
9013bf215546Sopenharmony_ci * 3DSTATE_CONSTANT_* packets are restored during a context restore,
9014bf215546Sopenharmony_ci * even though they point to a BO that has been already unreferenced at
9015bf215546Sopenharmony_ci * the end of the previous batch buffer. This has been fine so far since
9016bf215546Sopenharmony_ci * we are protected by these scratch page (every address not covered by
9017bf215546Sopenharmony_ci * a BO should be pointing to the scratch page). But on CNL, it is
9018bf215546Sopenharmony_ci * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
9019bf215546Sopenharmony_ci * instruction.
9020bf215546Sopenharmony_ci *
9021bf215546Sopenharmony_ci * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
9022bf215546Sopenharmony_ci * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
9023bf215546Sopenharmony_ci * context restore, so the mentioned hang doesn't happen. However,
9024bf215546Sopenharmony_ci * software must program push constant commands for all stages prior to
9025bf215546Sopenharmony_ci * rendering anything, so we flag them as dirty.
9026bf215546Sopenharmony_ci *
9027bf215546Sopenharmony_ci * Finally, we also make sure to stall at pixel scoreboard to make sure the
9028bf215546Sopenharmony_ci * constants have been loaded into the EUs prior to disable the push constants
9029bf215546Sopenharmony_ci * so that it doesn't hang a previous 3DPRIMITIVE.
9030bf215546Sopenharmony_ci */
9031bf215546Sopenharmony_ci#if GFX_VER >= 7
9032bf215546Sopenharmony_cistatic void
9033bf215546Sopenharmony_cigen7_emit_isp_disable(struct crocus_batch *batch)
9034bf215546Sopenharmony_ci{
9035bf215546Sopenharmony_ci   crocus_emit_raw_pipe_control(batch, "isp disable",
9036bf215546Sopenharmony_ci                                PIPE_CONTROL_STALL_AT_SCOREBOARD |
9037bf215546Sopenharmony_ci                                PIPE_CONTROL_CS_STALL,
9038bf215546Sopenharmony_ci                                NULL, 0, 0);
9039bf215546Sopenharmony_ci   crocus_emit_raw_pipe_control(batch, "isp disable",
9040bf215546Sopenharmony_ci                                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
9041bf215546Sopenharmony_ci                                PIPE_CONTROL_CS_STALL,
9042bf215546Sopenharmony_ci                                NULL, 0, 0);
9043bf215546Sopenharmony_ci
9044bf215546Sopenharmony_ci   struct crocus_context *ice = batch->ice;
9045bf215546Sopenharmony_ci   ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_CONSTANTS_VS |
9046bf215546Sopenharmony_ci                              CROCUS_STAGE_DIRTY_CONSTANTS_TCS |
9047bf215546Sopenharmony_ci                              CROCUS_STAGE_DIRTY_CONSTANTS_TES |
9048bf215546Sopenharmony_ci                              CROCUS_STAGE_DIRTY_CONSTANTS_GS |
9049bf215546Sopenharmony_ci                              CROCUS_STAGE_DIRTY_CONSTANTS_FS);
9050bf215546Sopenharmony_ci}
9051bf215546Sopenharmony_ci#endif
9052bf215546Sopenharmony_ci
9053bf215546Sopenharmony_ci#if GFX_VER >= 7
9054bf215546Sopenharmony_cistatic void
9055bf215546Sopenharmony_cicrocus_state_finish_batch(struct crocus_batch *batch)
9056bf215546Sopenharmony_ci{
9057bf215546Sopenharmony_ci#if GFX_VERx10 == 75
9058bf215546Sopenharmony_ci   if (batch->name == CROCUS_BATCH_RENDER) {
9059bf215546Sopenharmony_ci      crocus_emit_mi_flush(batch);
9060bf215546Sopenharmony_ci      crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
9061bf215546Sopenharmony_ci         ptr.ColorCalcStatePointer = batch->ice->shaders.cc_offset;
9062bf215546Sopenharmony_ci      }
9063bf215546Sopenharmony_ci
9064bf215546Sopenharmony_ci      crocus_emit_pipe_control_flush(batch, "hsw wa", PIPE_CONTROL_RENDER_TARGET_FLUSH |
9065bf215546Sopenharmony_ci                                     PIPE_CONTROL_CS_STALL);
9066bf215546Sopenharmony_ci   }
9067bf215546Sopenharmony_ci#endif
9068bf215546Sopenharmony_ci   gen7_emit_isp_disable(batch);
9069bf215546Sopenharmony_ci}
9070bf215546Sopenharmony_ci#endif
9071bf215546Sopenharmony_ci
9072bf215546Sopenharmony_cistatic void
9073bf215546Sopenharmony_cicrocus_batch_reset_dirty(struct crocus_batch *batch)
9074bf215546Sopenharmony_ci{
9075bf215546Sopenharmony_ci   /* unreference any index buffer so it get reemitted. */
9076bf215546Sopenharmony_ci   pipe_resource_reference(&batch->ice->state.index_buffer.res, NULL);
9077bf215546Sopenharmony_ci
9078bf215546Sopenharmony_ci   /* for GEN4/5 need to reemit anything that ends up in the state batch that points to anything in the state batch
9079bf215546Sopenharmony_ci    * as the old state batch won't still be available.
9080bf215546Sopenharmony_ci    */
9081bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER |
9082bf215546Sopenharmony_ci      CROCUS_DIRTY_COLOR_CALC_STATE;
9083bf215546Sopenharmony_ci
9084bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS;
9085bf215546Sopenharmony_ci
9086bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
9087bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS;
9088bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES;
9089bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS;
9090bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS;
9091bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS;
9092bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS;
9093bf215546Sopenharmony_ci
9094bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS;
9095bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES;
9096bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
9097bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_GS;
9098bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_FS;
9099bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;
9100bf215546Sopenharmony_ci
9101bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
9102bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS;
9103bf215546Sopenharmony_ci   batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS;
9104bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT | CROCUS_DIRTY_SF_CL_VIEWPORT;
9105bf215546Sopenharmony_ci
9106bf215546Sopenharmony_ci#if GFX_VER >= 6
9107bf215546Sopenharmony_ci   /* SCISSOR_STATE */
9108bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;
9109bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;
9110bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;
9111bf215546Sopenharmony_ci
9112bf215546Sopenharmony_ci#endif
9113bf215546Sopenharmony_ci#if GFX_VER <= 5
9114bf215546Sopenharmony_ci   /* dirty the SF state on gen4/5 */
9115bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_RASTER;
9116bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;
9117bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_CLIP;
9118bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_WM;
9119bf215546Sopenharmony_ci#endif
9120bf215546Sopenharmony_ci#if GFX_VER >= 7
9121bf215546Sopenharmony_ci   /* Streamout dirty */
9122bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;
9123bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST;
9124bf215546Sopenharmony_ci   batch->ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;
9125bf215546Sopenharmony_ci#endif
9126bf215546Sopenharmony_ci}
9127bf215546Sopenharmony_ci
9128bf215546Sopenharmony_ci#if GFX_VERx10 == 75
9129bf215546Sopenharmony_cistruct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ice)
9130bf215546Sopenharmony_ci{
9131bf215546Sopenharmony_ci   return &ice->state.cso_rast->cso;
9132bf215546Sopenharmony_ci}
9133bf215546Sopenharmony_ci#endif
9134bf215546Sopenharmony_ci
9135bf215546Sopenharmony_ci#if GFX_VER >= 6
9136bf215546Sopenharmony_cistatic void update_so_strides(struct crocus_context *ice,
9137bf215546Sopenharmony_ci                              uint16_t *strides)
9138bf215546Sopenharmony_ci{
9139bf215546Sopenharmony_ci   for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
9140bf215546Sopenharmony_ci      struct crocus_stream_output_target *so = (void *)ice->state.so_target[i];
9141bf215546Sopenharmony_ci      if (so)
9142bf215546Sopenharmony_ci         so->stride = strides[i] * sizeof(uint32_t);
9143bf215546Sopenharmony_ci   }
9144bf215546Sopenharmony_ci}
9145bf215546Sopenharmony_ci#endif
9146bf215546Sopenharmony_ci
9147bf215546Sopenharmony_cistatic void crocus_fill_clamp_mask(const struct crocus_sampler_state *samp,
9148bf215546Sopenharmony_ci                                   int s,
9149bf215546Sopenharmony_ci                                   uint32_t *clamp_mask)
9150bf215546Sopenharmony_ci{
9151bf215546Sopenharmony_ci#if GFX_VER < 8
9152bf215546Sopenharmony_ci   if (samp->pstate.min_img_filter != PIPE_TEX_FILTER_NEAREST &&
9153bf215546Sopenharmony_ci       samp->pstate.mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
9154bf215546Sopenharmony_ci      if (samp->pstate.wrap_s == PIPE_TEX_WRAP_CLAMP)
9155bf215546Sopenharmony_ci         clamp_mask[0] |= (1 << s);
9156bf215546Sopenharmony_ci      if (samp->pstate.wrap_t == PIPE_TEX_WRAP_CLAMP)
9157bf215546Sopenharmony_ci         clamp_mask[1] |= (1 << s);
9158bf215546Sopenharmony_ci      if (samp->pstate.wrap_r == PIPE_TEX_WRAP_CLAMP)
9159bf215546Sopenharmony_ci         clamp_mask[2] |= (1 << s);
9160bf215546Sopenharmony_ci   }
9161bf215546Sopenharmony_ci#endif
9162bf215546Sopenharmony_ci}
9163bf215546Sopenharmony_ci
9164bf215546Sopenharmony_cistatic void
9165bf215546Sopenharmony_cicrocus_set_frontend_noop(struct pipe_context *ctx, bool enable)
9166bf215546Sopenharmony_ci{
9167bf215546Sopenharmony_ci   struct crocus_context *ice = (struct crocus_context *) ctx;
9168bf215546Sopenharmony_ci
9169bf215546Sopenharmony_ci   if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_RENDER], enable)) {
9170bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER;
9171bf215546Sopenharmony_ci      ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
9172bf215546Sopenharmony_ci   }
9173bf215546Sopenharmony_ci
9174bf215546Sopenharmony_ci   if (ice->batch_count == 1)
9175bf215546Sopenharmony_ci      return;
9176bf215546Sopenharmony_ci
9177bf215546Sopenharmony_ci   if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_COMPUTE], enable)) {
9178bf215546Sopenharmony_ci      ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;
9179bf215546Sopenharmony_ci      ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
9180bf215546Sopenharmony_ci   }
9181bf215546Sopenharmony_ci}
9182bf215546Sopenharmony_ci
9183bf215546Sopenharmony_civoid
9184bf215546Sopenharmony_cigenX(crocus_init_screen_state)(struct crocus_screen *screen)
9185bf215546Sopenharmony_ci{
9186bf215546Sopenharmony_ci   assert(screen->devinfo.verx10 == GFX_VERx10);
9187bf215546Sopenharmony_ci   assert(screen->devinfo.ver == GFX_VER);
9188bf215546Sopenharmony_ci   screen->vtbl.destroy_state = crocus_destroy_state;
9189bf215546Sopenharmony_ci   screen->vtbl.init_render_context = crocus_init_render_context;
9190bf215546Sopenharmony_ci   screen->vtbl.upload_render_state = crocus_upload_render_state;
9191bf215546Sopenharmony_ci#if GFX_VER >= 7
9192bf215546Sopenharmony_ci   screen->vtbl.init_compute_context = crocus_init_compute_context;
9193bf215546Sopenharmony_ci   screen->vtbl.upload_compute_state = crocus_upload_compute_state;
9194bf215546Sopenharmony_ci#endif
9195bf215546Sopenharmony_ci   screen->vtbl.emit_raw_pipe_control = crocus_emit_raw_pipe_control;
9196bf215546Sopenharmony_ci   screen->vtbl.emit_mi_report_perf_count = crocus_emit_mi_report_perf_count;
9197bf215546Sopenharmony_ci   screen->vtbl.rebind_buffer = crocus_rebind_buffer;
9198bf215546Sopenharmony_ci#if GFX_VERx10 >= 75
9199bf215546Sopenharmony_ci   screen->vtbl.load_register_reg32 = crocus_load_register_reg32;
9200bf215546Sopenharmony_ci   screen->vtbl.load_register_reg64 = crocus_load_register_reg64;
9201bf215546Sopenharmony_ci   screen->vtbl.load_register_imm32 = crocus_load_register_imm32;
9202bf215546Sopenharmony_ci   screen->vtbl.load_register_imm64 = crocus_load_register_imm64;
9203bf215546Sopenharmony_ci   screen->vtbl.store_data_imm32 = crocus_store_data_imm32;
9204bf215546Sopenharmony_ci   screen->vtbl.store_data_imm64 = crocus_store_data_imm64;
9205bf215546Sopenharmony_ci#endif
9206bf215546Sopenharmony_ci#if GFX_VER >= 7
9207bf215546Sopenharmony_ci   screen->vtbl.load_register_mem32 = crocus_load_register_mem32;
9208bf215546Sopenharmony_ci   screen->vtbl.load_register_mem64 = crocus_load_register_mem64;
9209bf215546Sopenharmony_ci   screen->vtbl.copy_mem_mem = crocus_copy_mem_mem;
9210bf215546Sopenharmony_ci   screen->vtbl.create_so_decl_list = crocus_create_so_decl_list;
9211bf215546Sopenharmony_ci#endif
9212bf215546Sopenharmony_ci   screen->vtbl.update_surface_base_address = crocus_update_surface_base_address;
9213bf215546Sopenharmony_ci#if GFX_VER >= 6
9214bf215546Sopenharmony_ci   screen->vtbl.store_register_mem32 = crocus_store_register_mem32;
9215bf215546Sopenharmony_ci   screen->vtbl.store_register_mem64 = crocus_store_register_mem64;
9216bf215546Sopenharmony_ci#endif
9217bf215546Sopenharmony_ci   screen->vtbl.populate_vs_key = crocus_populate_vs_key;
9218bf215546Sopenharmony_ci   screen->vtbl.populate_tcs_key = crocus_populate_tcs_key;
9219bf215546Sopenharmony_ci   screen->vtbl.populate_tes_key = crocus_populate_tes_key;
9220bf215546Sopenharmony_ci   screen->vtbl.populate_gs_key = crocus_populate_gs_key;
9221bf215546Sopenharmony_ci   screen->vtbl.populate_fs_key = crocus_populate_fs_key;
9222bf215546Sopenharmony_ci   screen->vtbl.populate_cs_key = crocus_populate_cs_key;
9223bf215546Sopenharmony_ci   screen->vtbl.lost_genx_state = crocus_lost_genx_state;
9224bf215546Sopenharmony_ci#if GFX_VER >= 7
9225bf215546Sopenharmony_ci   screen->vtbl.finish_batch = crocus_state_finish_batch;
9226bf215546Sopenharmony_ci#endif
9227bf215546Sopenharmony_ci#if GFX_VER <= 5
9228bf215546Sopenharmony_ci   screen->vtbl.upload_urb_fence = crocus_upload_urb_fence;
9229bf215546Sopenharmony_ci   screen->vtbl.calculate_urb_fence = crocus_calculate_urb_fence;
9230bf215546Sopenharmony_ci#endif
9231bf215546Sopenharmony_ci   screen->vtbl.fill_clamp_mask = crocus_fill_clamp_mask;
9232bf215546Sopenharmony_ci   screen->vtbl.batch_reset_dirty = crocus_batch_reset_dirty;
9233bf215546Sopenharmony_ci   screen->vtbl.translate_prim_type = translate_prim_type;
9234bf215546Sopenharmony_ci#if GFX_VER >= 6
9235bf215546Sopenharmony_ci   screen->vtbl.update_so_strides = update_so_strides;
9236bf215546Sopenharmony_ci   screen->vtbl.get_so_offset = crocus_get_so_offset;
9237bf215546Sopenharmony_ci#endif
9238bf215546Sopenharmony_ci
9239bf215546Sopenharmony_ci   genX(crocus_init_blt)(screen);
9240bf215546Sopenharmony_ci}
9241bf215546Sopenharmony_ci
9242bf215546Sopenharmony_civoid
9243bf215546Sopenharmony_cigenX(crocus_init_state)(struct crocus_context *ice)
9244bf215546Sopenharmony_ci{
9245bf215546Sopenharmony_ci   struct pipe_context *ctx = &ice->ctx;
9246bf215546Sopenharmony_ci
9247bf215546Sopenharmony_ci   ctx->create_blend_state = crocus_create_blend_state;
9248bf215546Sopenharmony_ci   ctx->create_depth_stencil_alpha_state = crocus_create_zsa_state;
9249bf215546Sopenharmony_ci   ctx->create_rasterizer_state = crocus_create_rasterizer_state;
9250bf215546Sopenharmony_ci   ctx->create_sampler_state = crocus_create_sampler_state;
9251bf215546Sopenharmony_ci   ctx->create_sampler_view = crocus_create_sampler_view;
9252bf215546Sopenharmony_ci   ctx->create_surface = crocus_create_surface;
9253bf215546Sopenharmony_ci   ctx->create_vertex_elements_state = crocus_create_vertex_elements;
9254bf215546Sopenharmony_ci   ctx->bind_blend_state = crocus_bind_blend_state;
9255bf215546Sopenharmony_ci   ctx->bind_depth_stencil_alpha_state = crocus_bind_zsa_state;
9256bf215546Sopenharmony_ci   ctx->bind_sampler_states = crocus_bind_sampler_states;
9257bf215546Sopenharmony_ci   ctx->bind_rasterizer_state = crocus_bind_rasterizer_state;
9258bf215546Sopenharmony_ci   ctx->bind_vertex_elements_state = crocus_bind_vertex_elements_state;
9259bf215546Sopenharmony_ci   ctx->delete_blend_state = crocus_delete_state;
9260bf215546Sopenharmony_ci   ctx->delete_depth_stencil_alpha_state = crocus_delete_state;
9261bf215546Sopenharmony_ci   ctx->delete_rasterizer_state = crocus_delete_state;
9262bf215546Sopenharmony_ci   ctx->delete_sampler_state = crocus_delete_state;
9263bf215546Sopenharmony_ci   ctx->delete_vertex_elements_state = crocus_delete_state;
9264bf215546Sopenharmony_ci   ctx->set_blend_color = crocus_set_blend_color;
9265bf215546Sopenharmony_ci   ctx->set_clip_state = crocus_set_clip_state;
9266bf215546Sopenharmony_ci   ctx->set_constant_buffer = crocus_set_constant_buffer;
9267bf215546Sopenharmony_ci   ctx->set_shader_buffers = crocus_set_shader_buffers;
9268bf215546Sopenharmony_ci   ctx->set_shader_images = crocus_set_shader_images;
9269bf215546Sopenharmony_ci   ctx->set_sampler_views = crocus_set_sampler_views;
9270bf215546Sopenharmony_ci   ctx->set_tess_state = crocus_set_tess_state;
9271bf215546Sopenharmony_ci   ctx->set_patch_vertices = crocus_set_patch_vertices;
9272bf215546Sopenharmony_ci   ctx->set_framebuffer_state = crocus_set_framebuffer_state;
9273bf215546Sopenharmony_ci   ctx->set_polygon_stipple = crocus_set_polygon_stipple;
9274bf215546Sopenharmony_ci   ctx->set_sample_mask = crocus_set_sample_mask;
9275bf215546Sopenharmony_ci   ctx->set_scissor_states = crocus_set_scissor_states;
9276bf215546Sopenharmony_ci   ctx->set_stencil_ref = crocus_set_stencil_ref;
9277bf215546Sopenharmony_ci   ctx->set_vertex_buffers = crocus_set_vertex_buffers;
9278bf215546Sopenharmony_ci   ctx->set_viewport_states = crocus_set_viewport_states;
9279bf215546Sopenharmony_ci   ctx->sampler_view_destroy = crocus_sampler_view_destroy;
9280bf215546Sopenharmony_ci   ctx->surface_destroy = crocus_surface_destroy;
9281bf215546Sopenharmony_ci   ctx->draw_vbo = crocus_draw_vbo;
9282bf215546Sopenharmony_ci   ctx->launch_grid = crocus_launch_grid;
9283bf215546Sopenharmony_ci
9284bf215546Sopenharmony_ci   ctx->set_frontend_noop = crocus_set_frontend_noop;
9285bf215546Sopenharmony_ci
9286bf215546Sopenharmony_ci#if GFX_VER >= 6
9287bf215546Sopenharmony_ci   ctx->create_stream_output_target = crocus_create_stream_output_target;
9288bf215546Sopenharmony_ci   ctx->stream_output_target_destroy = crocus_stream_output_target_destroy;
9289bf215546Sopenharmony_ci   ctx->set_stream_output_targets = crocus_set_stream_output_targets;
9290bf215546Sopenharmony_ci#endif
9291bf215546Sopenharmony_ci
9292bf215546Sopenharmony_ci   ice->state.dirty = ~0ull;
9293bf215546Sopenharmony_ci   ice->state.stage_dirty = ~0ull;
9294bf215546Sopenharmony_ci
9295bf215546Sopenharmony_ci   ice->state.statistics_counters_enabled = true;
9296bf215546Sopenharmony_ci
9297bf215546Sopenharmony_ci   ice->state.sample_mask = 0xff;
9298bf215546Sopenharmony_ci   ice->state.num_viewports = 1;
9299bf215546Sopenharmony_ci   ice->state.prim_mode = PIPE_PRIM_MAX;
9300bf215546Sopenharmony_ci   ice->state.reduced_prim_mode = PIPE_PRIM_MAX;
9301bf215546Sopenharmony_ci   ice->state.genx = calloc(1, sizeof(struct crocus_genx_state));
9302bf215546Sopenharmony_ci   ice->draw.derived_params.drawid = -1;
9303bf215546Sopenharmony_ci
9304bf215546Sopenharmony_ci   /* Default all scissor rectangles to be empty regions. */
9305bf215546Sopenharmony_ci   for (int i = 0; i < CROCUS_MAX_VIEWPORTS; i++) {
9306bf215546Sopenharmony_ci      ice->state.scissors[i] = (struct pipe_scissor_state) {
9307bf215546Sopenharmony_ci         .minx = 1, .maxx = 0, .miny = 1, .maxy = 0,
9308bf215546Sopenharmony_ci      };
9309bf215546Sopenharmony_ci   }
9310bf215546Sopenharmony_ci}
9311