1/*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "util/format/u_format.h"
25#include "util/half_float.h"
26#include "v3d_context.h"
27#include "broadcom/common/v3d_macros.h"
28#include "broadcom/cle/v3dx_pack.h"
29#include "broadcom/common/v3d_util.h"
30#include "broadcom/compiler/v3d_compiler.h"
31
32static uint8_t
33v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
34{
35        /* We may get a bad blendfactor when blending is disabled. */
36        if (factor == 0)
37                return V3D_BLEND_FACTOR_ZERO;
38
39        switch (factor) {
40        case PIPE_BLENDFACTOR_ZERO:
41                return V3D_BLEND_FACTOR_ZERO;
42        case PIPE_BLENDFACTOR_ONE:
43                return V3D_BLEND_FACTOR_ONE;
44        case PIPE_BLENDFACTOR_SRC_COLOR:
45                return V3D_BLEND_FACTOR_SRC_COLOR;
46        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
47                return V3D_BLEND_FACTOR_INV_SRC_COLOR;
48        case PIPE_BLENDFACTOR_DST_COLOR:
49                return V3D_BLEND_FACTOR_DST_COLOR;
50        case PIPE_BLENDFACTOR_INV_DST_COLOR:
51                return V3D_BLEND_FACTOR_INV_DST_COLOR;
52        case PIPE_BLENDFACTOR_SRC_ALPHA:
53                return V3D_BLEND_FACTOR_SRC_ALPHA;
54        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
55                return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
56        case PIPE_BLENDFACTOR_DST_ALPHA:
57                return (dst_alpha_one ?
58                        V3D_BLEND_FACTOR_ONE :
59                        V3D_BLEND_FACTOR_DST_ALPHA);
60        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
61                return (dst_alpha_one ?
62                        V3D_BLEND_FACTOR_ZERO :
63                        V3D_BLEND_FACTOR_INV_DST_ALPHA);
64        case PIPE_BLENDFACTOR_CONST_COLOR:
65                return V3D_BLEND_FACTOR_CONST_COLOR;
66        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
67                return V3D_BLEND_FACTOR_INV_CONST_COLOR;
68        case PIPE_BLENDFACTOR_CONST_ALPHA:
69                return V3D_BLEND_FACTOR_CONST_ALPHA;
70        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
71                return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
72        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
73                return (dst_alpha_one ?
74                        V3D_BLEND_FACTOR_ZERO :
75                        V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
76        default:
77                unreachable("Bad blend factor");
78        }
79}
80
81#if V3D_VERSION < 40
82static inline uint16_t
83swizzled_border_color(const struct v3d_device_info *devinfo,
84                      struct pipe_sampler_state *sampler,
85                      struct v3d_sampler_view *sview,
86                      int chan)
87{
88        const struct util_format_description *desc =
89                util_format_description(sview->base.format);
90        uint8_t swiz = chan;
91
92        /* If we're doing swizzling in the sampler, then only rearrange the
93         * border color for the mismatch between the V3D texture format and
94         * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
95         * the sampler's swizzle.
96         *
97         * For swizzling in the shader, we don't do any pre-swizzling of the
98         * border color.
99         */
100        if (v3d_get_tex_return_size(devinfo, sview->base.format,
101                                    sampler->compare_mode) != 32)
102                swiz = desc->swizzle[swiz];
103
104        switch (swiz) {
105        case PIPE_SWIZZLE_0:
106                return _mesa_float_to_half(0.0);
107        case PIPE_SWIZZLE_1:
108                return _mesa_float_to_half(1.0);
109        default:
110                return _mesa_float_to_half(sampler->border_color.f[swiz]);
111        }
112}
113
114static void
115emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
116                 int i)
117{
118        struct v3d_job *job = v3d->job;
119        struct pipe_sampler_state *psampler = stage_tex->samplers[i];
120        struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
121        struct pipe_sampler_view *psview = stage_tex->textures[i];
122        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
123        struct pipe_resource *prsc = psview->texture;
124        struct v3d_resource *rsc = v3d_resource(prsc);
125        const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
126
127        stage_tex->texture_state[i].offset =
128                v3d_cl_ensure_space(&job->indirect,
129                                    cl_packet_length(TEXTURE_SHADER_STATE),
130                                    32);
131        v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
132                             job->indirect.bo);
133
134        uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
135                                                       psampler->compare_mode);
136
137        struct V3D33_TEXTURE_SHADER_STATE unpacked = {
138                /* XXX */
139                .border_color_red = swizzled_border_color(devinfo, psampler,
140                                                          sview, 0),
141                .border_color_green = swizzled_border_color(devinfo, psampler,
142                                                            sview, 1),
143                .border_color_blue = swizzled_border_color(devinfo, psampler,
144                                                           sview, 2),
145                .border_color_alpha = swizzled_border_color(devinfo, psampler,
146                                                            sview, 3),
147
148                /* In the normal texturing path, the LOD gets clamped between
149                 * min/max, and the base_level field (set in the sampler view
150                 * from first_level) only decides where the min/mag switch
151                 * happens, so we need to use the LOD clamps to keep us
152                 * between min and max.
153                 *
154                 * For txf, the LOD clamp is still used, despite GL not
155                 * wanting that.  We will need to have a separate
156                 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
157                 * support txf properly.
158                 */
159                .min_level_of_detail = MIN2(psview->u.tex.first_level +
160                                            MAX2(psampler->min_lod, 0),
161                                            psview->u.tex.last_level),
162                .max_level_of_detail = MIN2(psview->u.tex.first_level +
163                                            MAX2(psampler->max_lod,
164                                                 psampler->min_lod),
165                                            psview->u.tex.last_level),
166
167                .texture_base_pointer = cl_address(rsc->bo,
168                                                   rsc->slices[0].offset),
169
170                .output_32_bit = return_size == 32,
171        };
172
173        /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
174         * 32-bit, we leave swizzling up to the shader compiler.
175         *
176         * Note: Contrary to the docs, the swizzle still applies even if the
177         * return size is 32.  It's just that you probably want to swizzle in
178         * the shader, because you need the Y/Z/W channels to be defined.
179         */
180        if (return_size == 32) {
181                unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X);
182                unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y);
183                unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z);
184                unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W);
185        } else {
186                unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]);
187                unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]);
188                unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]);
189                unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]);
190        }
191
192        int min_img_filter = psampler->min_img_filter;
193        int min_mip_filter = psampler->min_mip_filter;
194        int mag_img_filter = psampler->mag_img_filter;
195
196        if (return_size == 32) {
197                min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
198                min_img_filter = PIPE_TEX_FILTER_NEAREST;
199                mag_img_filter = PIPE_TEX_FILTER_NEAREST;
200        }
201
202        bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
203        switch (min_mip_filter) {
204        case PIPE_TEX_MIPFILTER_NONE:
205                unpacked.filter += min_nearest ? 2 : 0;
206                break;
207        case PIPE_TEX_MIPFILTER_NEAREST:
208                unpacked.filter += min_nearest ? 4 : 8;
209                break;
210        case PIPE_TEX_MIPFILTER_LINEAR:
211                unpacked.filter += min_nearest ? 4 : 8;
212                unpacked.filter += 2;
213                break;
214        }
215
216        if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
217                unpacked.filter++;
218
219        if (psampler->max_anisotropy > 8)
220                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
221        else if (psampler->max_anisotropy > 4)
222                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
223        else if (psampler->max_anisotropy > 2)
224                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
225        else if (psampler->max_anisotropy)
226                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
227
228        uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
229        cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
230
231        for (int i = 0; i < ARRAY_SIZE(packed); i++)
232                packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
233
234        /* TMU indirect structs need to be 32b aligned. */
235        v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
236        cl_emit_prepacked(&job->indirect, &packed);
237}
238
239static void
240emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
241{
242        for (int i = 0; i < stage_tex->num_textures; i++) {
243                if (stage_tex->textures[i])
244                        emit_one_texture(v3d, stage_tex, i);
245        }
246}
247#endif /* V3D_VERSION < 40 */
248
249static uint32_t
250translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
251{
252        if (v3d->swap_color_rb & (1 << rt)) {
253                colormask = ((colormask & (2 | 8)) |
254                             ((colormask & 1) << 2) |
255                             ((colormask & 4) >> 2));
256        }
257
258        return (~colormask) & 0xf;
259}
260
261static void
262emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
263              struct pipe_blend_state *blend, int rt, uint8_t rt_mask,
264              bool blend_dst_alpha_one)
265{
266        struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
267
268#if V3D_VERSION >= 40
269        /* We don't need to emit blend state for disabled RTs. */
270        if (!rtblend->blend_enable)
271                return;
272#endif
273
274        cl_emit(&job->bcl, BLEND_CFG, config) {
275#if V3D_VERSION >= 40
276                config.render_target_mask = rt_mask;
277#else
278                assert(rt == 0);
279#endif
280
281                config.color_blend_mode = rtblend->rgb_func;
282                config.color_blend_dst_factor =
283                        v3d_factor(rtblend->rgb_dst_factor,
284                                   blend_dst_alpha_one);
285                config.color_blend_src_factor =
286                        v3d_factor(rtblend->rgb_src_factor,
287                                   blend_dst_alpha_one);
288
289                config.alpha_blend_mode = rtblend->alpha_func;
290                config.alpha_blend_dst_factor =
291                        v3d_factor(rtblend->alpha_dst_factor,
292                                   blend_dst_alpha_one);
293                config.alpha_blend_src_factor =
294                        v3d_factor(rtblend->alpha_src_factor,
295                                   blend_dst_alpha_one);
296        }
297}
298
299static void
300emit_flat_shade_flags(struct v3d_job *job,
301                      int varying_offset,
302                      uint32_t varyings,
303                      enum V3DX(Varying_Flags_Action) lower,
304                      enum V3DX(Varying_Flags_Action) higher)
305{
306        cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
307                flags.varying_offset_v0 = varying_offset;
308                flags.flat_shade_flags_for_varyings_v024 = varyings;
309                flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
310                        lower;
311                flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
312                        higher;
313        }
314}
315
316#if V3D_VERSION >= 40
317static void
318emit_noperspective_flags(struct v3d_job *job,
319                         int varying_offset,
320                         uint32_t varyings,
321                         enum V3DX(Varying_Flags_Action) lower,
322                         enum V3DX(Varying_Flags_Action) higher)
323{
324        cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
325                flags.varying_offset_v0 = varying_offset;
326                flags.non_perspective_flags_for_varyings_v024 = varyings;
327                flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
328                        lower;
329                flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
330                        higher;
331        }
332}
333
334static void
335emit_centroid_flags(struct v3d_job *job,
336                    int varying_offset,
337                    uint32_t varyings,
338                    enum V3DX(Varying_Flags_Action) lower,
339                    enum V3DX(Varying_Flags_Action) higher)
340{
341        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
342                flags.varying_offset_v0 = varying_offset;
343                flags.centroid_flags_for_varyings_v024 = varyings;
344                flags.action_for_centroid_flags_of_lower_numbered_varyings =
345                        lower;
346                flags.action_for_centroid_flags_of_higher_numbered_varyings =
347                        higher;
348        }
349}
350#endif /* V3D_VERSION >= 40 */
351
352static bool
353emit_varying_flags(struct v3d_job *job, uint32_t *flags,
354                   void (*flag_emit_callback)(struct v3d_job *job,
355                                              int varying_offset,
356                                              uint32_t flags,
357                                              enum V3DX(Varying_Flags_Action) lower,
358                                              enum V3DX(Varying_Flags_Action) higher))
359{
360        struct v3d_context *v3d = job->v3d;
361        bool emitted_any = false;
362
363        for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
364                if (!flags[i])
365                        continue;
366
367                if (emitted_any) {
368                        flag_emit_callback(job, i, flags[i],
369                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
370                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED);
371                } else if (i == 0) {
372                        flag_emit_callback(job, i, flags[i],
373                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
374                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
375                } else {
376                        flag_emit_callback(job, i, flags[i],
377                                           V3D_VARYING_FLAGS_ACTION_ZEROED,
378                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
379                }
380                emitted_any = true;
381        }
382
383        return emitted_any;
384}
385
386static inline struct v3d_uncompiled_shader *
387get_tf_shader(struct v3d_context *v3d)
388{
389        if (v3d->prog.bind_gs)
390                return v3d->prog.bind_gs;
391        else
392                return v3d->prog.bind_vs;
393}
394
395void
396v3dX(emit_state)(struct pipe_context *pctx)
397{
398        struct v3d_context *v3d = v3d_context(pctx);
399        struct v3d_job *job = v3d->job;
400        bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
401
402        if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT |
403                          V3D_DIRTY_RASTERIZER)) {
404                float *vpscale = v3d->viewport.scale;
405                float *vptranslate = v3d->viewport.translate;
406                float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
407                float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
408                float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
409                float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
410
411                /* Clip to the scissor if it's enabled, but still clip to the
412                 * drawable regardless since that controls where the binner
413                 * tries to put things.
414                 *
415                 * Additionally, always clip the rendering to the viewport,
416                 * since the hardware does guardband clipping, meaning
417                 * primitives would rasterize outside of the view volume.
418                 */
419                uint32_t minx, miny, maxx, maxy;
420                if (!v3d->rasterizer->base.scissor) {
421                        minx = MAX2(vp_minx, 0);
422                        miny = MAX2(vp_miny, 0);
423                        maxx = MIN2(vp_maxx, job->draw_width);
424                        maxy = MIN2(vp_maxy, job->draw_height);
425                } else {
426                        minx = MAX2(vp_minx, v3d->scissor.minx);
427                        miny = MAX2(vp_miny, v3d->scissor.miny);
428                        maxx = MIN2(vp_maxx, v3d->scissor.maxx);
429                        maxy = MIN2(vp_maxy, v3d->scissor.maxy);
430                }
431
432                cl_emit(&job->bcl, CLIP_WINDOW, clip) {
433                        clip.clip_window_left_pixel_coordinate = minx;
434                        clip.clip_window_bottom_pixel_coordinate = miny;
435                        if (maxx > minx && maxy > miny) {
436                                clip.clip_window_width_in_pixels = maxx - minx;
437                                clip.clip_window_height_in_pixels = maxy - miny;
438                        } else if (V3D_VERSION < 41) {
439                                /* The HW won't entirely clip out when scissor
440                                 * w/h is 0.  Just treat it the same as
441                                 * rasterizer discard.
442                                 */
443                                rasterizer_discard = true;
444                                clip.clip_window_width_in_pixels = 1;
445                                clip.clip_window_height_in_pixels = 1;
446                        }
447                }
448
449                job->draw_min_x = MIN2(job->draw_min_x, minx);
450                job->draw_min_y = MIN2(job->draw_min_y, miny);
451                job->draw_max_x = MAX2(job->draw_max_x, maxx);
452                job->draw_max_y = MAX2(job->draw_max_y, maxy);
453
454                if (!v3d->rasterizer->base.scissor) {
455                    job->scissor.disabled = true;
456                } else if (!job->scissor.disabled &&
457                           (v3d->dirty & V3D_DIRTY_SCISSOR)) {
458                        if (job->scissor.count < MAX_JOB_SCISSORS) {
459                                job->scissor.rects[job->scissor.count].min_x =
460                                        v3d->scissor.minx;
461                                job->scissor.rects[job->scissor.count].min_y =
462                                        v3d->scissor.miny;
463                                job->scissor.rects[job->scissor.count].max_x =
464                                        v3d->scissor.maxx - 1;
465                                job->scissor.rects[job->scissor.count].max_y =
466                                        v3d->scissor.maxy - 1;
467                                job->scissor.count++;
468                        } else {
469                                job->scissor.disabled = true;
470                                perf_debug("Too many scissor rects.");
471                        }
472                }
473        }
474
475        if (v3d->dirty & (V3D_DIRTY_RASTERIZER |
476                          V3D_DIRTY_ZSA |
477                          V3D_DIRTY_BLEND |
478                          V3D_DIRTY_COMPILED_FS)) {
479                cl_emit(&job->bcl, CFG_BITS, config) {
480                        config.enable_forward_facing_primitive =
481                                !rasterizer_discard &&
482                                !(v3d->rasterizer->base.cull_face &
483                                  PIPE_FACE_FRONT);
484                        config.enable_reverse_facing_primitive =
485                                !rasterizer_discard &&
486                                !(v3d->rasterizer->base.cull_face &
487                                  PIPE_FACE_BACK);
488                        /* This seems backwards, but it's what gets the
489                         * clipflat test to pass.
490                         */
491                        config.clockwise_primitives =
492                                v3d->rasterizer->base.front_ccw;
493
494                        config.enable_depth_offset =
495                                v3d->rasterizer->base.offset_tri;
496
497                        /* V3D follows GL behavior where the sample mask only
498                         * applies when MSAA is enabled.  Gallium has sample
499                         * mask apply anyway, and the MSAA blit shaders will
500                         * set sample mask without explicitly setting
501                         * rasterizer oversample.  Just force it on here,
502                         * since the blit shaders are the only way to have
503                         * !multisample && samplemask != 0xf.
504                         */
505                        config.rasterizer_oversample_mode =
506                                v3d->rasterizer->base.multisample ||
507                                v3d->sample_mask != 0xf;
508
509                        config.direct3d_provoking_vertex =
510                                v3d->rasterizer->base.flatshade_first;
511
512                        config.blend_enable = v3d->blend->blend_enables;
513
514                        /* Note: EZ state may update based on the compiled FS,
515                         * along with ZSA
516                         */
517                        config.early_z_updates_enable =
518                                (job->ez_state != V3D_EZ_DISABLED);
519                        if (v3d->zsa->base.depth_enabled) {
520                                config.z_updates_enable =
521                                        v3d->zsa->base.depth_writemask;
522                                config.early_z_enable =
523                                        config.early_z_updates_enable;
524                                config.depth_test_function =
525                                        v3d->zsa->base.depth_func;
526                        } else {
527                                config.depth_test_function = PIPE_FUNC_ALWAYS;
528                        }
529
530                        config.stencil_enable =
531                                v3d->zsa->base.stencil[0].enabled;
532
533                        /* Use nicer line caps when line smoothing is
534                         * enabled
535                         */
536                        config.line_rasterization =
537                                v3d_line_smoothing_enabled(v3d) ?
538                                V3D_LINE_RASTERIZATION_PERP_END_CAPS :
539                                V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
540                }
541
542        }
543
544        if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
545            v3d->rasterizer->base.offset_tri) {
546                if (job->zsbuf &&
547                    job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
548                        cl_emit_prepacked_sized(&job->bcl,
549                                                v3d->rasterizer->depth_offset_z16,
550                                                cl_packet_length(DEPTH_OFFSET));
551                } else {
552                        cl_emit_prepacked_sized(&job->bcl,
553                                                v3d->rasterizer->depth_offset,
554                                                cl_packet_length(DEPTH_OFFSET));
555                }
556        }
557
558        if (v3d->dirty & V3D_DIRTY_RASTERIZER) {
559                cl_emit(&job->bcl, POINT_SIZE, point_size) {
560                        point_size.point_size = v3d->rasterizer->point_size;
561                }
562
563                cl_emit(&job->bcl, LINE_WIDTH, line_width) {
564                        line_width.line_width = v3d_get_real_line_width(v3d);
565                }
566        }
567
568        if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
569                cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
570                        clip.viewport_half_width_in_1_256th_of_pixel =
571                                v3d->viewport.scale[0] * 256.0f;
572                        clip.viewport_half_height_in_1_256th_of_pixel =
573                                v3d->viewport.scale[1] * 256.0f;
574                }
575
576                cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
577                        clip.viewport_z_offset_zc_to_zs =
578                                v3d->viewport.translate[2];
579                        clip.viewport_z_scale_zc_to_zs =
580                                v3d->viewport.scale[2];
581                }
582                cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
583                        float z1 = (v3d->viewport.translate[2] -
584                                    v3d->viewport.scale[2]);
585                        float z2 = (v3d->viewport.translate[2] +
586                                    v3d->viewport.scale[2]);
587                        clip.minimum_zw = MIN2(z1, z2);
588                        clip.maximum_zw = MAX2(z1, z2);
589                }
590
591                cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
592                        vp.viewport_centre_x_coordinate =
593                                v3d->viewport.translate[0];
594                        vp.viewport_centre_y_coordinate =
595                                v3d->viewport.translate[1];
596                }
597        }
598
599        if (v3d->dirty & V3D_DIRTY_BLEND) {
600                struct v3d_blend_state *blend = v3d->blend;
601
602                if (blend->blend_enables) {
603#if V3D_VERSION >= 40
604                        cl_emit(&job->bcl, BLEND_ENABLES, enables) {
605                                enables.mask = blend->blend_enables;
606                        }
607#endif
608
609                        if (blend->base.independent_blend_enable) {
610                                for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
611                                        emit_rt_blend(v3d, job, &blend->base, i,
612                                                      (1 << i),
613                                                      v3d->blend_dst_alpha_one & (1 << i));
614                        } else if (v3d->blend_dst_alpha_one &&
615                                   util_bitcount(v3d->blend_dst_alpha_one) < job->nr_cbufs) {
616                                /* Even if we don't have independent per-RT
617                                 * blending, we may have a combination of RT
618                                 * formats were some RTs have an alpha channel
619                                 * and others don't. Since this affects how
620                                 * blending is performed, we also need to emit
621                                 * independent blend configurations in this
622                                 * case: one for RTs with alpha and one for
623                                 * RTs without.
624                                 */
625                                emit_rt_blend(v3d, job, &blend->base, 0,
626                                              ((1 << V3D_MAX_DRAW_BUFFERS) - 1) &
627                                                   v3d->blend_dst_alpha_one,
628                                              true);
629                                emit_rt_blend(v3d, job, &blend->base, 0,
630                                              ((1 << V3D_MAX_DRAW_BUFFERS) - 1) &
631                                                   ~v3d->blend_dst_alpha_one,
632                                              false);
633                        } else {
634                                emit_rt_blend(v3d, job, &blend->base, 0,
635                                              (1 << V3D_MAX_DRAW_BUFFERS) - 1,
636                                              v3d->blend_dst_alpha_one);
637                        }
638                }
639        }
640
641        if (v3d->dirty & V3D_DIRTY_BLEND) {
642                struct pipe_blend_state *blend = &v3d->blend->base;
643
644                cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
645                        for (int i = 0; i < 4; i++) {
646                                int rt = blend->independent_blend_enable ? i : 0;
647                                int rt_mask = blend->rt[rt].colormask;
648
649                                mask.mask |= translate_colormask(v3d, rt_mask,
650                                                                 i) << (4 * i);
651                        }
652                }
653        }
654
655        /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
656         * color.
657         */
658        if (v3d->dirty & V3D_DIRTY_BLEND_COLOR ||
659            (V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) {
660                cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
661                        color.red_f16 = (v3d->swap_color_rb ?
662                                          v3d->blend_color.hf[2] :
663                                          v3d->blend_color.hf[0]);
664                        color.green_f16 = v3d->blend_color.hf[1];
665                        color.blue_f16 = (v3d->swap_color_rb ?
666                                           v3d->blend_color.hf[0] :
667                                           v3d->blend_color.hf[2]);
668                        color.alpha_f16 = v3d->blend_color.hf[3];
669                }
670        }
671
672        if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) {
673                struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
674                struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
675
676                if (front->enabled) {
677                        cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
678                                               v3d->zsa->stencil_front, config) {
679                                config.stencil_ref_value =
680                                        v3d->stencil_ref.ref_value[0];
681                        }
682                }
683
684                if (back->enabled) {
685                        cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
686                                               v3d->zsa->stencil_back, config) {
687                                config.stencil_ref_value =
688                                        v3d->stencil_ref.ref_value[1];
689                        }
690                }
691        }
692
693#if V3D_VERSION < 40
694        /* Pre-4.x, we have texture state that depends on both the sampler and
695         * the view, so we merge them together at draw time.
696         */
697        if (v3d->dirty & V3D_DIRTY_FRAGTEX)
698                emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
699
700        if (v3d->dirty & V3D_DIRTY_GEOMTEX)
701                emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
702
703        if (v3d->dirty & V3D_DIRTY_VERTTEX)
704                emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
705#endif
706
707        if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
708                if (!emit_varying_flags(job,
709                                        v3d->prog.fs->prog_data.fs->flat_shade_flags,
710                                        emit_flat_shade_flags)) {
711                        cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
712                }
713        }
714
715#if V3D_VERSION >= 40
716        if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
717                if (!emit_varying_flags(job,
718                                        v3d->prog.fs->prog_data.fs->noperspective_flags,
719                                        emit_noperspective_flags)) {
720                        cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
721                }
722        }
723
724        if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) {
725                if (!emit_varying_flags(job,
726                                        v3d->prog.fs->prog_data.fs->centroid_flags,
727                                        emit_centroid_flags)) {
728                        cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
729                }
730        }
731#endif
732
733        /* Set up the transform feedback data specs (which VPM entries to
734         * output to which buffers).
735         */
736        if (v3d->dirty & (V3D_DIRTY_STREAMOUT |
737                          V3D_DIRTY_RASTERIZER |
738                          V3D_DIRTY_PRIM_MODE)) {
739                struct v3d_streamout_stateobj *so = &v3d->streamout;
740                if (so->num_targets) {
741                        bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
742                                                v3d->rasterizer->base.point_size_per_vertex);
743                        struct v3d_uncompiled_shader *tf_shader =
744                                get_tf_shader(v3d);
745                        uint16_t *tf_specs = (psiz_per_vertex ?
746                                              tf_shader->tf_specs_psiz :
747                                              tf_shader->tf_specs);
748
749#if V3D_VERSION >= 40
750                        bool tf_enabled = v3d_transform_feedback_enabled(v3d);
751                        job->tf_enabled |= tf_enabled;
752
753                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
754                                tfe.number_of_16_bit_output_data_specs_following =
755                                        tf_shader->num_tf_specs;
756                                tfe.enable = tf_enabled;
757                        };
758#else /* V3D_VERSION < 40 */
759                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
760                                tfe.number_of_32_bit_output_buffer_address_following =
761                                        so->num_targets;
762                                tfe.number_of_16_bit_output_data_specs_following =
763                                        tf_shader->num_tf_specs;
764                        };
765#endif /* V3D_VERSION < 40 */
766                        for (int i = 0; i < tf_shader->num_tf_specs; i++) {
767                                cl_emit_prepacked(&job->bcl, &tf_specs[i]);
768                        }
769                } else {
770#if V3D_VERSION >= 40
771                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
772                                tfe.enable = false;
773                        };
774#endif /* V3D_VERSION >= 40 */
775                }
776        }
777
778        /* Set up the transform feedback buffers. */
779        if (v3d->dirty & V3D_DIRTY_STREAMOUT) {
780                struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
781                struct v3d_streamout_stateobj *so = &v3d->streamout;
782                for (int i = 0; i < so->num_targets; i++) {
783                        const struct pipe_stream_output_target *target =
784                                so->targets[i];
785                        struct v3d_resource *rsc = target ?
786                                v3d_resource(target->buffer) : NULL;
787                        struct pipe_shader_state *ss = &tf_shader->base;
788                        struct pipe_stream_output_info *info = &ss->stream_output;
789                        uint32_t offset = (v3d->streamout.offsets[i] *
790                                           info->stride[i] * 4);
791
792#if V3D_VERSION >= 40
793                        if (!target)
794                                continue;
795
796                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
797                                output.buffer_address =
798                                        cl_address(rsc->bo,
799                                                   target->buffer_offset +
800                                                   offset);
801                                output.buffer_size_in_32_bit_words =
802                                        (target->buffer_size - offset) >> 2;
803                                output.buffer_number = i;
804                        }
805#else /* V3D_VERSION < 40 */
806                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
807                                if (target) {
808                                        output.address =
809                                                cl_address(rsc->bo,
810                                                           target->buffer_offset +
811                                                           offset);
812                                }
813                        };
814#endif /* V3D_VERSION < 40 */
815                        if (target) {
816                                v3d_job_add_tf_write_resource(v3d->job,
817                                                              target->buffer);
818                        }
819                        /* XXX: buffer_size? */
820                }
821        }
822
823        if (v3d->dirty & V3D_DIRTY_OQ) {
824                cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
825                        if (v3d->active_queries && v3d->current_oq) {
826                                counter.address = cl_address(v3d->current_oq, 0);
827                        }
828                }
829        }
830
831#if V3D_VERSION >= 40
832        if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
833                cl_emit(&job->bcl, SAMPLE_STATE, state) {
834                        /* Note: SampleCoverage was handled at the
835                         * frontend level by converting to sample_mask.
836                         */
837                        state.coverage = 1.0;
838                        state.mask = job->msaa ? v3d->sample_mask : 0xf;
839                }
840        }
841#endif
842}
843