1/*
2 * Copyright (c) 2014-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27#include "etnaviv_emit.h"
28
29#include "etnaviv_blend.h"
30#include "etnaviv_compiler.h"
31#include "etnaviv_context.h"
32#include "etnaviv_rasterizer.h"
33#include "etnaviv_resource.h"
34#include "etnaviv_rs.h"
35#include "etnaviv_screen.h"
36#include "etnaviv_shader.h"
37#include "etnaviv_texture.h"
38#include "etnaviv_translate.h"
39#include "etnaviv_uniforms.h"
40#include "etnaviv_util.h"
41#include "etnaviv_zsa.h"
42#include "hw/common.xml.h"
43#include "hw/state.xml.h"
44#include "hw/state_blt.xml.h"
45#include "util/u_math.h"
46
47/* Queue a STALL command (queues 2 words) */
48static inline void
49CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
50{
51   etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
52   etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
53}
54
55void
56etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
57{
58   bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
59   etna_cmd_stream_reserve(stream, blt ? 8 : 4);
60
61   if (blt) {
62      etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
63      etna_cmd_stream_emit(stream, 1);
64   }
65
66   /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67   etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
68   etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
69
70   if (from == SYNC_RECIPIENT_FE) {
71      /* if the frontend is to be stalled, queue a STALL frontend command */
72      CMD_STALL(stream, from, to);
73   } else {
74      /* otherwise, load the STALL token state */
75      etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
76      etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
77   }
78
79   if (blt) {
80      etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
81      etna_cmd_stream_emit(stream, 0);
82   }
83}
84
85#define EMIT_STATE(state_name, src_value) \
86   etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
87
88#define EMIT_STATE_FIXP(state_name, src_value) \
89   etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
90
91#define EMIT_STATE_RELOC(state_name, src_value) \
92   etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
93
94#define ETNA_3D_CONTEXT_SIZE  (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
95
96static unsigned
97required_stream_size(struct etna_context *ctx)
98{
99   unsigned size = ETNA_3D_CONTEXT_SIZE;
100
101   /* stall + flush */
102   size += 2 + 4;
103
104   /* vertex elements */
105   size += ctx->vertex_elements->num_elements + 1;
106
107   /* uniforms - worst case (2 words per uniform load) */
108   size += ctx->shader.vs->uniforms.count * 2;
109   size += ctx->shader.fs->uniforms.count * 2;
110
111   /* shader */
112   size += ctx->shader_state.vs_inst_mem_size + 1;
113   size += ctx->shader_state.ps_inst_mem_size + 1;
114
115   /* DRAW_INDEXED_PRIMITIVES command */
116   size += 6;
117
118   /* reserve for alignment etc. */
119   size += 64;
120
121   return size;
122}
123
124/* Emit state that only exists on HALTI5+ */
125static void
126emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
127{
128   struct etna_cmd_stream *stream = ctx->stream;
129   uint32_t dirty = ctx->dirty;
130   struct etna_coalesce coalesce;
131
132   etna_coalesce_start(stream, &coalesce);
133   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
134      /* Magic states (load balancing, inter-unit sync, buffers) */
135      /*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG, ctx->shader_state.FE_HALTI5_ID_CONFIG);
136      /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
137      /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
138      for (int x = 0; x < 4; ++x) {
139         /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
140      }
141   }
142   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
143      for (int x = 0; x < 4; ++x) {
144         /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
145      }
146   }
147   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
148      /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
149      /*00A94*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
150      /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
151      /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
152      /*01084*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
153      /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
154   }
155   etna_coalesce_end(stream, &coalesce);
156}
157
158/* Emit state that no longer exists on HALTI5 */
159static void
160emit_pre_halti5_state(struct etna_context *ctx)
161{
162   struct etna_cmd_stream *stream = ctx->stream;
163   uint32_t dirty = ctx->dirty;
164   struct etna_coalesce coalesce;
165
166   etna_coalesce_start(stream, &coalesce);
167   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
168      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
169   }
170   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
171      for (int x = 0; x < 4; ++x) {
172        /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
173      }
174   }
175   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
176      for (int x = 0; x < 4; ++x) {
177        /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
178      }
179   }
180   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
181      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
182   }
183   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
184      for (int x = 0; x < 10; ++x) {
185         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
186      }
187   }
188   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
189      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
190      for (int x = 0; x < 4; ++x) {
191         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
192      }
193      for (int x = 0; x < 16; ++x) {
194         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
195      }
196   }
197   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
198      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
199   }
200   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
201      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
202   }
203   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
204      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
205      for (int x = 0; x < 2; ++x) {
206         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
207      }
208      /*03834*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS2, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
209   }
210   etna_coalesce_end(stream, &coalesce);
211}
212
213/* Weave state before draw operation. This function merges all the compiled
214 * state blocks under the context into one device register state. Parts of
215 * this state that are changed since last call (dirty) will be uploaded as
216 * state changes in the command buffer. */
217void
218etna_emit_state(struct etna_context *ctx)
219{
220   struct etna_cmd_stream *stream = ctx->stream;
221   struct etna_screen *screen = ctx->screen;
222   unsigned ccw = ctx->rasterizer->front_ccw;
223
224
225   /* Pre-reserve the command buffer space which we are likely to need.
226    * This must cover all the state emitted below, and the following
227    * draw command. */
228   etna_cmd_stream_reserve(stream, required_stream_size(ctx));
229
230   uint32_t dirty = ctx->dirty;
231
232   /* Pre-processing: see what caches we need to flush before making state changes. */
233   uint32_t to_flush = 0;
234   if (unlikely(dirty & (ETNA_DIRTY_BLEND)))
235      to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
236   if (unlikely(dirty & ETNA_DIRTY_ZSA))
237      to_flush |= VIVS_GL_FLUSH_CACHE_DEPTH;
238   if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
239      to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
240   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
241      to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
242   if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
243      to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
244
245   if (to_flush) {
246      etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
247      etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
248   }
249
250   /* Flush TS cache before changing TS configuration. */
251   if (unlikely(dirty & ETNA_DIRTY_TS)) {
252      etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
253   }
254
255   /* Update vertex elements. This is different from any of the other states, in that
256    * a) the number of vertex elements written matters: so write only active ones
257    * b) the vertex element states must all be written: do not skip entries that stay the same */
258   if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
259      if (screen->specs.halti >= 5) {
260         /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
261            ctx->vertex_elements->num_elements,
262            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
263         /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
264            ctx->vertex_elements->num_elements,
265            ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
266         /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
267            ctx->vertex_elements->num_elements,
268            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
269      } else {
270         /* Special case: vertex elements must always be sent in full if changed */
271         /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
272            ctx->vertex_elements->num_elements,
273            ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
274         if (screen->specs.halti >= 2) {
275            /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
276               ctx->vertex_elements->num_elements,
277               ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
278         }
279      }
280   }
281   unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
282                           ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
283                           : ctx->shader_state.VS_OUTPUT_COUNT;
284
285   /* The following code is originally generated by gen_merge_state.py, to
286    * emit state in increasing order of address (this makes it possible to merge
287    * consecutive register updates into one SET_STATE command)
288    *
289    * There have been some manual changes, where the weaving operation is not
290    * simply bitwise or:
291    * - scissor fixp
292    * - num vertex elements
293    * - scissor handling
294    * - num samplers
295    * - texture lod
296    * - ETNA_DIRTY_TS
297    * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
298    * change anyway
299    * - PS / framebuffer interaction for MSAA
300    * - move update of GL_MULTI_SAMPLE_CONFIG first
301    * - add unlikely()/likely()
302    */
303   struct etna_coalesce coalesce;
304
305   etna_coalesce_start(stream, &coalesce);
306
307   /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
308    * directly
309    *    or indirectly */
310   /* multi sample config is set first, and outside of the normal sorting
311    * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
312    * possibly PS.TEMP_REGISTER_CONTROL).
313    */
314   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
315      uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
316      val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
317
318      /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
319   }
320   if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
321      /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
322      /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
323   }
324   if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
325      /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
326   }
327   if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
328      if (screen->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
329         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
330            /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
331         }
332         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
333            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
334               /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
335            }
336         }
337      } else if(screen->specs.stream_count > 1) { /* hw w/ multiple vertex streams */
338         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
339            /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
340         }
341         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
342            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
343               /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
344            }
345         }
346      } else { /* hw w/ single vertex stream */
347         /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
348         /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
349      }
350   }
351   /* gallium has instance divisor as part of elements state */
352   if ((dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) && screen->specs.halti >= 2) {
353      for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {
354         /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]);
355      }
356   }
357
358   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
359
360      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
361   }
362   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
363      /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
364      /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
365   }
366   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
367      /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
368   }
369   if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
370      /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
371      /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
372      /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
373      /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
374      /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
375      /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
376   }
377   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
378      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
379
380      /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
381      /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
382      /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
383   }
384   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
385      /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
386   }
387   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
388      uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
389      /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
390   }
391   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
392      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
393      /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
394      /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
395   }
396   if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
397      /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, ctx->clipping.minx << 16);
398      /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, ctx->clipping.miny << 16);
399      /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT);
400      /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM);
401   }
402   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
403      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
404
405      /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
406      /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
407      /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
408   }
409   if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
410      /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_CLIP_MARGIN_RIGHT);
411      /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM);
412   }
413   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
414      /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
415   }
416   if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
417      /*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, etna_zsa_state(ctx->zsa)->RA_DEPTH_CONFIG);
418   }
419   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
420      /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
421      /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
422                           ctx->framebuffer.msaa_mode
423                              ? ctx->shader_state.PS_INPUT_COUNT_MSAA
424                              : ctx->shader_state.PS_INPUT_COUNT);
425      /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
426                           ctx->framebuffer.msaa_mode
427                              ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
428                              : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
429      /*01010*/ EMIT_STATE(PS_CONTROL, ctx->framebuffer.PS_CONTROL);
430      /*01030*/ EMIT_STATE(PS_CONTROL_EXT, ctx->framebuffer.PS_CONTROL_EXT);
431   }
432   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SHADER))) {
433      /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, (etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG |
434                                             ctx->framebuffer.PE_DEPTH_CONFIG));
435   }
436   if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
437      /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
438      /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
439   }
440   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
441      /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
442
443      if (screen->specs.halti < 0 || screen->model == 0x880) {
444         /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
445      }
446
447      /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
448   }
449
450   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
451      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP[ccw];
452      /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
453   }
454   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {
455      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG[ccw];
456      /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG[ccw]);
457   }
458   if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
459      uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
460      /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
461   }
462   if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
463      /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
464   }
465   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
466      uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
467      /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
468   }
469   if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
470      uint32_t val;
471      /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
472       * as a mask to enable the bits from blend PE_COLOR_FORMAT */
473      val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
474              VIVS_PE_COLOR_FORMAT_OVERWRITE);
475      val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
476      val &= ctx->framebuffer.PE_COLOR_FORMAT;
477      /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
478   }
479   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
480      if (screen->specs.halti >= 0 && screen->model != 0x880) {
481         /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
482         /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
483         /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
484         /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
485         /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
486         /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
487      } else {
488         /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
489         /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
490         /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
491      }
492   }
493   if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_ZSA))) {
494      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT;
495      /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, val | ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[ccw]);
496   }
497   if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
498      struct etna_blend_state *blend = etna_blend_state(ctx->blend);
499      /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
500   }
501   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
502      struct etna_blend_state *blend = etna_blend_state(ctx->blend);
503      for (int x = 0; x < 2; ++x) {
504         /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
505      }
506   }
507   if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR)) &&
508       VIV_FEATURE(screen, chipMinorFeatures1, HALF_FLOAT)) {
509         /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0, ctx->blend_color.PE_ALPHA_COLOR_EXT0);
510         /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1, ctx->blend_color.PE_ALPHA_COLOR_EXT1);
511   }
512   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
513      /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2, etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT2[ccw]);
514   }
515   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER)) && screen->specs.halti >= 3)
516      /*014BC*/ EMIT_STATE(PE_MEM_CONFIG, ctx->framebuffer.PE_MEM_CONFIG);
517   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
518      /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
519      /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
520      /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
521      /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
522      /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
523      /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
524      /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
525      /*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT, ctx->framebuffer.TS_COLOR_CLEAR_VALUE_EXT);
526   }
527   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
528      /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
529   }
530   etna_coalesce_end(stream, &coalesce);
531   /* end only EMIT_STATE */
532
533   /* Emit strongly architecture-specific state */
534   if (screen->specs.halti >= 5)
535      emit_halti5_only_state(ctx, vs_output_count);
536   else
537      emit_pre_halti5_state(ctx);
538
539   /* Beginning from Halti0 some of the new shader and sampler states are not
540    * self-synchronizing anymore. Thus we need to stall the FE on PE completion
541    * before loading the new states to avoid corrupting the state of the
542    * in-flight draw.
543    */
544   if (screen->specs.halti >= 0 &&
545       (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF |
546                      ETNA_DIRTY_SAMPLERS | ETNA_DIRTY_SAMPLER_VIEWS)))
547      etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
548
549   ctx->emit_texture_state(ctx);
550
551   /* We need to update the uniform cache only if one of the following bits are
552    * set in ctx->dirty:
553    * - ETNA_DIRTY_SHADER
554    * - ETNA_DIRTY_CONSTBUF
555    * - uniforms_dirty_bits
556    *
557    * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
558    * all
559    * other cases we can load on the changed uniforms.
560    */
561   static const uint32_t uniform_dirty_bits =
562      ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
563
564   /**** Large dynamically-sized state ****/
565   bool do_uniform_flush = screen->specs.halti < 5;
566   if (dirty & (ETNA_DIRTY_SHADER)) {
567      /* Special case: a new shader was loaded; simply re-load all uniforms and
568       * shader code at once */
569      /* This sequence is special, do not change ordering unless necessary. According to comment
570         snippets in the Vivante kernel driver a process called "steering" goes on while programming
571         shader state. This (as I understand it) means certain unified states are "steered"
572         toward a specific shader unit (VS/PS/...) based on either explicit flags in register
573         00860, or what other state is written before "auto-steering". So this means some
574         state can legitimately be programmed multiple times.
575       */
576
577      if (screen->specs.halti >= 5) { /* ICACHE (HALTI5) */
578         assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
579         /* Set icache (VS) */
580         etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
581         etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
582         assert(ctx->shader_state.VS_INST_ADDR.bo);
583         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
584         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
585         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
586         etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
587
588         /* Set icache (PS) */
589         etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
590         etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
591         assert(ctx->shader_state.PS_INST_ADDR.bo);
592         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
593         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
594         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
595         etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
596
597      } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
598         /* ICACHE (pre-HALTI5) */
599         assert(screen->specs.has_icache && screen->specs.has_shader_range_registers);
600         /* Set icache (VS) */
601         etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
602         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
603               VIVS_VS_ICACHE_CONTROL_ENABLE |
604               VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
605         assert(ctx->shader_state.VS_INST_ADDR.bo);
606         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
607
608         /* Set icache (PS) */
609         etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
610         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
611               VIVS_VS_ICACHE_CONTROL_ENABLE |
612               VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
613         assert(ctx->shader_state.PS_INST_ADDR.bo);
614         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
615      } else {
616         /* Upload shader directly, first flushing and disabling icache if
617          * supported on this hw */
618         if (screen->specs.has_icache) {
619            etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
620                  VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
621                  VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
622         }
623         if (screen->specs.has_shader_range_registers) {
624            etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
625            etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
626                                        0x100);
627         }
628         etna_set_state_multi(stream, screen->specs.vs_offset,
629                              ctx->shader_state.vs_inst_mem_size,
630                              ctx->shader_state.VS_INST_MEM);
631         etna_set_state_multi(stream, screen->specs.ps_offset,
632                              ctx->shader_state.ps_inst_mem_size,
633                              ctx->shader_state.PS_INST_MEM);
634      }
635
636      if (screen->specs.has_unified_uniforms) {
637         etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
638         etna_set_state(stream, VIVS_PS_UNIFORM_BASE, screen->specs.max_vs_uniforms);
639      }
640
641      if (do_uniform_flush)
642         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
643
644      etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
645
646      if (do_uniform_flush)
647         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
648
649      etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
650
651      if (screen->specs.halti >= 5) {
652         /* HALTI5 needs to be prompted to pre-fetch shaders */
653         etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
654         etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
655         etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
656      }
657   } else {
658      /* ideally this cache would only be flushed if there are VS uniform changes */
659      if (do_uniform_flush)
660         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
661
662      if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
663         etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
664
665      /* ideally this cache would only be flushed if there are PS uniform changes */
666      if (do_uniform_flush)
667         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
668
669      if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
670         etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
671   }
672/**** End of state update ****/
673#undef EMIT_STATE
674#undef EMIT_STATE_FIXP
675#undef EMIT_STATE_RELOC
676   ctx->dirty = 0;
677   ctx->dirty_sampler_views = 0;
678}
679