1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include <limits.h>
29bf215546Sopenharmony_ci#include "util/u_memory.h"
30bf215546Sopenharmony_ci#include "util/u_math.h"
31bf215546Sopenharmony_ci#include "util/u_rect.h"
32bf215546Sopenharmony_ci#include "util/u_surface.h"
33bf215546Sopenharmony_ci#include "util/u_pack_color.h"
34bf215546Sopenharmony_ci#include "util/u_string.h"
35bf215546Sopenharmony_ci#include "util/u_thread.h"
36bf215546Sopenharmony_ci#include "util/u_memset.h"
37bf215546Sopenharmony_ci#include "util/os_time.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "lp_scene_queue.h"
40bf215546Sopenharmony_ci#include "lp_context.h"
41bf215546Sopenharmony_ci#include "lp_debug.h"
42bf215546Sopenharmony_ci#include "lp_fence.h"
43bf215546Sopenharmony_ci#include "lp_perf.h"
44bf215546Sopenharmony_ci#include "lp_query.h"
45bf215546Sopenharmony_ci#include "lp_rast.h"
46bf215546Sopenharmony_ci#include "lp_rast_priv.h"
47bf215546Sopenharmony_ci#include "gallivm/lp_bld_format.h"
48bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h"
49bf215546Sopenharmony_ci#include "lp_scene.h"
50bf215546Sopenharmony_ci#include "lp_screen.h"
51bf215546Sopenharmony_ci#include "lp_tex_sample.h"
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci#ifdef DEBUG
55bf215546Sopenharmony_ciint jit_line = 0;
56bf215546Sopenharmony_ciconst struct lp_rast_state *jit_state = NULL;
57bf215546Sopenharmony_ciconst struct lp_rasterizer_task *jit_task = NULL;
58bf215546Sopenharmony_ci#endif
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ciconst float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 },
61bf215546Sopenharmony_ci                                       { 0.875, 0.375 },
62bf215546Sopenharmony_ci                                       { 0.125, 0.625 },
63bf215546Sopenharmony_ci                                       { 0.625, 0.875 } };
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci/**
66bf215546Sopenharmony_ci * Begin rasterizing a scene.
67bf215546Sopenharmony_ci * Called once per scene by one thread.
68bf215546Sopenharmony_ci */
69bf215546Sopenharmony_cistatic void
70bf215546Sopenharmony_cilp_rast_begin(struct lp_rasterizer *rast,
71bf215546Sopenharmony_ci              struct lp_scene *scene)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   rast->curr_scene = scene;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   lp_scene_begin_rasterization(scene);
78bf215546Sopenharmony_ci   lp_scene_bin_iter_begin(scene);
79bf215546Sopenharmony_ci}
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_cistatic void
83bf215546Sopenharmony_cilp_rast_end(struct lp_rasterizer *rast)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   rast->curr_scene = NULL;
86bf215546Sopenharmony_ci}
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci/**
90bf215546Sopenharmony_ci * Beginning rasterization of a tile.
91bf215546Sopenharmony_ci * \param x  window X position of the tile, in pixels
92bf215546Sopenharmony_ci * \param y  window Y position of the tile, in pixels
93bf215546Sopenharmony_ci */
94bf215546Sopenharmony_cistatic void
95bf215546Sopenharmony_cilp_rast_tile_begin(struct lp_rasterizer_task *task,
96bf215546Sopenharmony_ci                   const struct cmd_bin *bin,
97bf215546Sopenharmony_ci                   int x, int y)
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   struct lp_scene *scene = task->scene;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   task->bin = bin;
104bf215546Sopenharmony_ci   task->x = x * TILE_SIZE;
105bf215546Sopenharmony_ci   task->y = y * TILE_SIZE;
106bf215546Sopenharmony_ci   task->width = TILE_SIZE + x * TILE_SIZE > scene->fb.width ?
107bf215546Sopenharmony_ci                    scene->fb.width - x * TILE_SIZE : TILE_SIZE;
108bf215546Sopenharmony_ci   task->height = TILE_SIZE + y * TILE_SIZE > scene->fb.height ?
109bf215546Sopenharmony_ci                    scene->fb.height - y * TILE_SIZE : TILE_SIZE;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   task->thread_data.vis_counter = 0;
112bf215546Sopenharmony_ci   task->thread_data.ps_invocations = 0;
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   for (unsigned i = 0; i < scene->fb.nr_cbufs; i++) {
115bf215546Sopenharmony_ci      if (scene->fb.cbufs[i]) {
116bf215546Sopenharmony_ci         task->color_tiles[i] = scene->cbufs[i].map +
117bf215546Sopenharmony_ci                                scene->cbufs[i].stride * task->y +
118bf215546Sopenharmony_ci                                scene->cbufs[i].format_bytes * task->x;
119bf215546Sopenharmony_ci      }
120bf215546Sopenharmony_ci   }
121bf215546Sopenharmony_ci   if (scene->fb.zsbuf) {
122bf215546Sopenharmony_ci      task->depth_tile = scene->zsbuf.map +
123bf215546Sopenharmony_ci                         scene->zsbuf.stride * task->y +
124bf215546Sopenharmony_ci                         scene->zsbuf.format_bytes * task->x;
125bf215546Sopenharmony_ci   }
126bf215546Sopenharmony_ci}
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci/**
130bf215546Sopenharmony_ci * Clear the rasterizer's current color tile.
131bf215546Sopenharmony_ci * This is a bin command called during bin processing.
132bf215546Sopenharmony_ci * Clear commands always clear all bound layers.
133bf215546Sopenharmony_ci */
134bf215546Sopenharmony_cistatic void
135bf215546Sopenharmony_cilp_rast_clear_color(struct lp_rasterizer_task *task,
136bf215546Sopenharmony_ci                    const union lp_rast_cmd_arg arg)
137bf215546Sopenharmony_ci{
138bf215546Sopenharmony_ci   const struct lp_scene *scene = task->scene;
139bf215546Sopenharmony_ci   const unsigned cbuf = arg.clear_rb->cbuf;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   /* we never bin clear commands for non-existing buffers */
142bf215546Sopenharmony_ci   assert(cbuf < scene->fb.nr_cbufs);
143bf215546Sopenharmony_ci   assert(scene->fb.cbufs[cbuf]);
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   const enum pipe_format format = scene->fb.cbufs[cbuf]->format;
146bf215546Sopenharmony_ci   union util_color uc = arg.clear_rb->color_val;
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   /*
149bf215546Sopenharmony_ci    * this is pretty rough since we have target format (bunch of bytes...)
150bf215546Sopenharmony_ci    * here. dump it as raw 4 dwords.
151bf215546Sopenharmony_ci    */
152bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST,
153bf215546Sopenharmony_ci          "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
154bf215546Sopenharmony_ci          __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]);
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) {
157bf215546Sopenharmony_ci      void *map = (char *) scene->cbufs[cbuf].map
158bf215546Sopenharmony_ci         + scene->cbufs[cbuf].sample_stride * s;
159bf215546Sopenharmony_ci      util_fill_box(map,
160bf215546Sopenharmony_ci                    format,
161bf215546Sopenharmony_ci                    scene->cbufs[cbuf].stride,
162bf215546Sopenharmony_ci                    scene->cbufs[cbuf].layer_stride,
163bf215546Sopenharmony_ci                    task->x,
164bf215546Sopenharmony_ci                    task->y,
165bf215546Sopenharmony_ci                    0,
166bf215546Sopenharmony_ci                    task->width,
167bf215546Sopenharmony_ci                    task->height,
168bf215546Sopenharmony_ci                    scene->fb_max_layer + 1,
169bf215546Sopenharmony_ci                    &uc);
170bf215546Sopenharmony_ci   }
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   /* this will increase for each rb which probably doesn't mean much */
173bf215546Sopenharmony_ci   LP_COUNT(nr_color_tile_clear);
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci/**
178bf215546Sopenharmony_ci * Clear the rasterizer's current z/stencil tile.
179bf215546Sopenharmony_ci * This is a bin command called during bin processing.
180bf215546Sopenharmony_ci * Clear commands always clear all bound layers.
181bf215546Sopenharmony_ci */
182bf215546Sopenharmony_cistatic void
183bf215546Sopenharmony_cilp_rast_clear_zstencil(struct lp_rasterizer_task *task,
184bf215546Sopenharmony_ci                       const union lp_rast_cmd_arg arg)
185bf215546Sopenharmony_ci{
186bf215546Sopenharmony_ci   const struct lp_scene *scene = task->scene;
187bf215546Sopenharmony_ci   uint64_t clear_value64 = arg.clear_zstencil.value;
188bf215546Sopenharmony_ci   uint64_t clear_mask64 = arg.clear_zstencil.mask;
189bf215546Sopenharmony_ci   uint32_t clear_value = (uint32_t) clear_value64;
190bf215546Sopenharmony_ci   uint32_t clear_mask = (uint32_t) clear_mask64;
191bf215546Sopenharmony_ci   const unsigned height = task->height;
192bf215546Sopenharmony_ci   const unsigned width = task->width;
193bf215546Sopenharmony_ci   const unsigned dst_stride = scene->zsbuf.stride;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
196bf215546Sopenharmony_ci           __FUNCTION__, clear_value, clear_mask);
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   /*
199bf215546Sopenharmony_ci    * Clear the area of the depth/depth buffer matching this tile.
200bf215546Sopenharmony_ci    */
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   if (scene->fb.zsbuf) {
203bf215546Sopenharmony_ci      for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) {
204bf215546Sopenharmony_ci         uint8_t *dst_layer =
205bf215546Sopenharmony_ci            task->depth_tile + (s * scene->zsbuf.sample_stride);
206bf215546Sopenharmony_ci         const unsigned block_size =
207bf215546Sopenharmony_ci            util_format_get_blocksize(scene->fb.zsbuf->format);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci         clear_value &= clear_mask;
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci         for (unsigned layer = 0; layer <= scene->fb_max_layer; layer++) {
212bf215546Sopenharmony_ci            uint8_t *dst = dst_layer;
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci            switch (block_size) {
215bf215546Sopenharmony_ci            case 1:
216bf215546Sopenharmony_ci               assert(clear_mask == 0xff);
217bf215546Sopenharmony_ci               for (unsigned i = 0; i < height; i++) {
218bf215546Sopenharmony_ci                  uint8_t *row = (uint8_t *)dst;
219bf215546Sopenharmony_ci                  memset(row, (uint8_t) clear_value, width);
220bf215546Sopenharmony_ci                  dst += dst_stride;
221bf215546Sopenharmony_ci               }
222bf215546Sopenharmony_ci               break;
223bf215546Sopenharmony_ci            case 2:
224bf215546Sopenharmony_ci               if (clear_mask == 0xffff) {
225bf215546Sopenharmony_ci                  for (unsigned i = 0; i < height; i++) {
226bf215546Sopenharmony_ci                     uint16_t *row = (uint16_t *)dst;
227bf215546Sopenharmony_ci                     for (unsigned j = 0; j < width; j++)
228bf215546Sopenharmony_ci                        *row++ = (uint16_t) clear_value;
229bf215546Sopenharmony_ci                     dst += dst_stride;
230bf215546Sopenharmony_ci                  }
231bf215546Sopenharmony_ci               }
232bf215546Sopenharmony_ci               else {
233bf215546Sopenharmony_ci                  for (unsigned i = 0; i < height; i++) {
234bf215546Sopenharmony_ci                     uint16_t *row = (uint16_t *)dst;
235bf215546Sopenharmony_ci                     for (unsigned j = 0; j < width; j++) {
236bf215546Sopenharmony_ci                        uint16_t tmp = ~clear_mask & *row;
237bf215546Sopenharmony_ci                        *row++ = clear_value | tmp;
238bf215546Sopenharmony_ci                     }
239bf215546Sopenharmony_ci                     dst += dst_stride;
240bf215546Sopenharmony_ci                  }
241bf215546Sopenharmony_ci               }
242bf215546Sopenharmony_ci               break;
243bf215546Sopenharmony_ci            case 4:
244bf215546Sopenharmony_ci               if (clear_mask == 0xffffffff) {
245bf215546Sopenharmony_ci                  for (unsigned i = 0; i < height; i++) {
246bf215546Sopenharmony_ci                     util_memset32(dst, clear_value, width);
247bf215546Sopenharmony_ci                     dst += dst_stride;
248bf215546Sopenharmony_ci                  }
249bf215546Sopenharmony_ci               }
250bf215546Sopenharmony_ci               else {
251bf215546Sopenharmony_ci                  for (unsigned i = 0; i < height; i++) {
252bf215546Sopenharmony_ci                     uint32_t *row = (uint32_t *)dst;
253bf215546Sopenharmony_ci                     for (unsigned j = 0; j < width; j++) {
254bf215546Sopenharmony_ci                        uint32_t tmp = ~clear_mask & *row;
255bf215546Sopenharmony_ci                        *row++ = clear_value | tmp;
256bf215546Sopenharmony_ci                     }
257bf215546Sopenharmony_ci                     dst += dst_stride;
258bf215546Sopenharmony_ci                  }
259bf215546Sopenharmony_ci               }
260bf215546Sopenharmony_ci               break;
261bf215546Sopenharmony_ci            case 8:
262bf215546Sopenharmony_ci               clear_value64 &= clear_mask64;
263bf215546Sopenharmony_ci               if (clear_mask64 == 0xffffffffffULL) {
264bf215546Sopenharmony_ci                  for (unsigned i = 0; i < height; i++) {
265bf215546Sopenharmony_ci                     util_memset64(dst, clear_value64, width);
266bf215546Sopenharmony_ci                     dst += dst_stride;
267bf215546Sopenharmony_ci                  }
268bf215546Sopenharmony_ci               }
269bf215546Sopenharmony_ci               else {
270bf215546Sopenharmony_ci                  for (unsigned i = 0; i < height; i++) {
271bf215546Sopenharmony_ci                     uint64_t *row = (uint64_t *)dst;
272bf215546Sopenharmony_ci                     for (unsigned j = 0; j < width; j++) {
273bf215546Sopenharmony_ci                        uint64_t tmp = ~clear_mask64 & *row;
274bf215546Sopenharmony_ci                        *row++ = clear_value64 | tmp;
275bf215546Sopenharmony_ci                     }
276bf215546Sopenharmony_ci                     dst += dst_stride;
277bf215546Sopenharmony_ci                  }
278bf215546Sopenharmony_ci               }
279bf215546Sopenharmony_ci               break;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci            default:
282bf215546Sopenharmony_ci               assert(0);
283bf215546Sopenharmony_ci               break;
284bf215546Sopenharmony_ci            }
285bf215546Sopenharmony_ci            dst_layer += scene->zsbuf.layer_stride;
286bf215546Sopenharmony_ci         }
287bf215546Sopenharmony_ci      }
288bf215546Sopenharmony_ci   }
289bf215546Sopenharmony_ci}
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci/**
293bf215546Sopenharmony_ci * Run the shader on all blocks in a tile.  This is used when a tile is
294bf215546Sopenharmony_ci * completely contained inside a triangle.
295bf215546Sopenharmony_ci * This is a bin command called during bin processing.
296bf215546Sopenharmony_ci */
297bf215546Sopenharmony_cistatic void
298bf215546Sopenharmony_cilp_rast_shade_tile(struct lp_rasterizer_task *task,
299bf215546Sopenharmony_ci                   const union lp_rast_cmd_arg arg)
300bf215546Sopenharmony_ci{
301bf215546Sopenharmony_ci   const struct lp_scene *scene = task->scene;
302bf215546Sopenharmony_ci   const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
303bf215546Sopenharmony_ci   const unsigned tile_x = task->x, tile_y = task->y;
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   if (inputs->disable) {
306bf215546Sopenharmony_ci      /* This command was partially binned and has been disabled */
307bf215546Sopenharmony_ci      return;
308bf215546Sopenharmony_ci   }
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   const struct lp_rast_state *state = task->state;
313bf215546Sopenharmony_ci   assert(state);
314bf215546Sopenharmony_ci   if (!state) {
315bf215546Sopenharmony_ci      return;
316bf215546Sopenharmony_ci   }
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   const struct lp_fragment_shader_variant *variant = state->variant;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci   /* render the whole 64x64 tile in 4x4 chunks */
321bf215546Sopenharmony_ci   for (unsigned y = 0; y < task->height; y += 4){
322bf215546Sopenharmony_ci      for (unsigned x = 0; x < task->width; x += 4) {
323bf215546Sopenharmony_ci         /* color buffer */
324bf215546Sopenharmony_ci         uint8_t *color[PIPE_MAX_COLOR_BUFS];
325bf215546Sopenharmony_ci         unsigned stride[PIPE_MAX_COLOR_BUFS];
326bf215546Sopenharmony_ci         unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
327bf215546Sopenharmony_ci         for (unsigned i = 0; i < scene->fb.nr_cbufs; i++){
328bf215546Sopenharmony_ci            if (scene->fb.cbufs[i]) {
329bf215546Sopenharmony_ci               stride[i] = scene->cbufs[i].stride;
330bf215546Sopenharmony_ci               sample_stride[i] = scene->cbufs[i].sample_stride;
331bf215546Sopenharmony_ci               color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
332bf215546Sopenharmony_ci                                          tile_y + y,
333bf215546Sopenharmony_ci                                          inputs->layer + inputs->view_index);
334bf215546Sopenharmony_ci            }
335bf215546Sopenharmony_ci            else {
336bf215546Sopenharmony_ci               stride[i] = 0;
337bf215546Sopenharmony_ci               sample_stride[i] = 0;
338bf215546Sopenharmony_ci               color[i] = NULL;
339bf215546Sopenharmony_ci            }
340bf215546Sopenharmony_ci         }
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci         /* depth buffer */
343bf215546Sopenharmony_ci         uint8_t *depth = NULL;
344bf215546Sopenharmony_ci         unsigned depth_stride = 0;
345bf215546Sopenharmony_ci         unsigned depth_sample_stride = 0;
346bf215546Sopenharmony_ci         if (scene->zsbuf.map) {
347bf215546Sopenharmony_ci            depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
348bf215546Sopenharmony_ci                                           tile_y + y,
349bf215546Sopenharmony_ci                                           inputs->layer + inputs->view_index);
350bf215546Sopenharmony_ci            depth_stride = scene->zsbuf.stride;
351bf215546Sopenharmony_ci            depth_sample_stride = scene->zsbuf.sample_stride;
352bf215546Sopenharmony_ci         }
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci         uint64_t mask = 0;
355bf215546Sopenharmony_ci         for (unsigned i = 0; i < scene->fb_max_samples; i++)
356bf215546Sopenharmony_ci            mask |= (uint64_t)(0xffff) << (16 * i);
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci         /* Propagate non-interpolated raster state. */
359bf215546Sopenharmony_ci         task->thread_data.raster_state.viewport_index = inputs->viewport_index;
360bf215546Sopenharmony_ci         task->thread_data.raster_state.view_index = inputs->view_index;
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci         /* run shader on 4x4 block */
363bf215546Sopenharmony_ci         BEGIN_JIT_CALL(state, task);
364bf215546Sopenharmony_ci         variant->jit_function[RAST_WHOLE](&state->jit_context,
365bf215546Sopenharmony_ci                                            tile_x + x, tile_y + y,
366bf215546Sopenharmony_ci                                            inputs->frontfacing,
367bf215546Sopenharmony_ci                                            GET_A0(inputs),
368bf215546Sopenharmony_ci                                            GET_DADX(inputs),
369bf215546Sopenharmony_ci                                            GET_DADY(inputs),
370bf215546Sopenharmony_ci                                            color,
371bf215546Sopenharmony_ci                                            depth,
372bf215546Sopenharmony_ci                                            mask,
373bf215546Sopenharmony_ci                                            &task->thread_data,
374bf215546Sopenharmony_ci                                            stride,
375bf215546Sopenharmony_ci                                            depth_stride,
376bf215546Sopenharmony_ci                                            sample_stride,
377bf215546Sopenharmony_ci                                            depth_sample_stride);
378bf215546Sopenharmony_ci         END_JIT_CALL();
379bf215546Sopenharmony_ci      }
380bf215546Sopenharmony_ci   }
381bf215546Sopenharmony_ci}
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci/**
385bf215546Sopenharmony_ci * Run the shader on all blocks in a tile.  This is used when a tile is
386bf215546Sopenharmony_ci * completely contained inside a triangle, and the shader is opaque.
387bf215546Sopenharmony_ci * This is a bin command called during bin processing.
388bf215546Sopenharmony_ci */
389bf215546Sopenharmony_cistatic void
390bf215546Sopenharmony_cilp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
391bf215546Sopenharmony_ci                          const union lp_rast_cmd_arg arg)
392bf215546Sopenharmony_ci{
393bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci   assert(task->state);
396bf215546Sopenharmony_ci   if (!task->state) {
397bf215546Sopenharmony_ci      return;
398bf215546Sopenharmony_ci   }
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci   lp_rast_shade_tile(task, arg);
401bf215546Sopenharmony_ci}
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci/**
405bf215546Sopenharmony_ci * Compute shading for a 4x4 block of pixels inside a triangle.
406bf215546Sopenharmony_ci * This is a bin command called during bin processing.
407bf215546Sopenharmony_ci * \param x  X position of quad in window coords
408bf215546Sopenharmony_ci * \param y  Y position of quad in window coords
409bf215546Sopenharmony_ci */
410bf215546Sopenharmony_civoid
411bf215546Sopenharmony_cilp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
412bf215546Sopenharmony_ci                                const struct lp_rast_shader_inputs *inputs,
413bf215546Sopenharmony_ci                                unsigned x, unsigned y,
414bf215546Sopenharmony_ci                                uint64_t mask)
415bf215546Sopenharmony_ci{
416bf215546Sopenharmony_ci   const struct lp_rast_state *state = task->state;
417bf215546Sopenharmony_ci   const struct lp_fragment_shader_variant *variant = state->variant;
418bf215546Sopenharmony_ci   const struct lp_scene *scene = task->scene;
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   assert(state);
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci   /* Sanity checks */
423bf215546Sopenharmony_ci   assert(x < scene->tiles_x * TILE_SIZE);
424bf215546Sopenharmony_ci   assert(y < scene->tiles_y * TILE_SIZE);
425bf215546Sopenharmony_ci   assert(x % TILE_VECTOR_WIDTH == 0);
426bf215546Sopenharmony_ci   assert(y % TILE_VECTOR_HEIGHT == 0);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   assert((x % 4) == 0);
429bf215546Sopenharmony_ci   assert((y % 4) == 0);
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci   /* color buffer */
432bf215546Sopenharmony_ci   uint8_t *color[PIPE_MAX_COLOR_BUFS];
433bf215546Sopenharmony_ci   unsigned stride[PIPE_MAX_COLOR_BUFS];
434bf215546Sopenharmony_ci   unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
435bf215546Sopenharmony_ci   for (unsigned i = 0; i < scene->fb.nr_cbufs; i++) {
436bf215546Sopenharmony_ci      if (scene->fb.cbufs[i]) {
437bf215546Sopenharmony_ci         stride[i] = scene->cbufs[i].stride;
438bf215546Sopenharmony_ci         sample_stride[i] = scene->cbufs[i].sample_stride;
439bf215546Sopenharmony_ci         color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
440bf215546Sopenharmony_ci                                                    inputs->layer + inputs->view_index);
441bf215546Sopenharmony_ci      }
442bf215546Sopenharmony_ci      else {
443bf215546Sopenharmony_ci         stride[i] = 0;
444bf215546Sopenharmony_ci         sample_stride[i] = 0;
445bf215546Sopenharmony_ci         color[i] = NULL;
446bf215546Sopenharmony_ci      }
447bf215546Sopenharmony_ci   }
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci   /* depth buffer */
450bf215546Sopenharmony_ci   uint8_t *depth = NULL;
451bf215546Sopenharmony_ci   unsigned depth_stride = 0;
452bf215546Sopenharmony_ci   unsigned depth_sample_stride = 0;
453bf215546Sopenharmony_ci   if (scene->zsbuf.map) {
454bf215546Sopenharmony_ci      depth_stride = scene->zsbuf.stride;
455bf215546Sopenharmony_ci      depth_sample_stride = scene->zsbuf.sample_stride;
456bf215546Sopenharmony_ci      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index);
457bf215546Sopenharmony_ci   }
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci   assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   /*
462bf215546Sopenharmony_ci    * The rasterizer may produce fragments outside our
463bf215546Sopenharmony_ci    * allocated 4x4 blocks hence need to filter them out here.
464bf215546Sopenharmony_ci    */
465bf215546Sopenharmony_ci   if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
466bf215546Sopenharmony_ci      /* Propagate non-interpolated raster state. */
467bf215546Sopenharmony_ci      task->thread_data.raster_state.viewport_index = inputs->viewport_index;
468bf215546Sopenharmony_ci      task->thread_data.raster_state.view_index = inputs->view_index;
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci      /* run shader on 4x4 block */
471bf215546Sopenharmony_ci      BEGIN_JIT_CALL(state, task);
472bf215546Sopenharmony_ci      variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
473bf215546Sopenharmony_ci                                            x, y,
474bf215546Sopenharmony_ci                                            inputs->frontfacing,
475bf215546Sopenharmony_ci                                            GET_A0(inputs),
476bf215546Sopenharmony_ci                                            GET_DADX(inputs),
477bf215546Sopenharmony_ci                                            GET_DADY(inputs),
478bf215546Sopenharmony_ci                                            color,
479bf215546Sopenharmony_ci                                            depth,
480bf215546Sopenharmony_ci                                            mask,
481bf215546Sopenharmony_ci                                            &task->thread_data,
482bf215546Sopenharmony_ci                                            stride,
483bf215546Sopenharmony_ci                                            depth_stride,
484bf215546Sopenharmony_ci                                            sample_stride,
485bf215546Sopenharmony_ci                                            depth_sample_stride);
486bf215546Sopenharmony_ci      END_JIT_CALL();
487bf215546Sopenharmony_ci   }
488bf215546Sopenharmony_ci}
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_civoid
492bf215546Sopenharmony_cilp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
493bf215546Sopenharmony_ci                         const struct lp_rast_shader_inputs *inputs,
494bf215546Sopenharmony_ci                         unsigned x, unsigned y,
495bf215546Sopenharmony_ci                         unsigned mask)
496bf215546Sopenharmony_ci{
497bf215546Sopenharmony_ci   uint64_t new_mask = 0;
498bf215546Sopenharmony_ci   for (unsigned i = 0; i < task->scene->fb_max_samples; i++)
499bf215546Sopenharmony_ci      new_mask |= ((uint64_t)mask) << (16 * i);
500bf215546Sopenharmony_ci   lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask);
501bf215546Sopenharmony_ci}
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci/**
505bf215546Sopenharmony_ci * Directly copy pixels from a texture to the destination color buffer.
506bf215546Sopenharmony_ci * This is a bin command called during bin processing.
507bf215546Sopenharmony_ci */
508bf215546Sopenharmony_cistatic void
509bf215546Sopenharmony_cilp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task,
510bf215546Sopenharmony_ci                          const union lp_rast_cmd_arg arg)
511bf215546Sopenharmony_ci{
512bf215546Sopenharmony_ci   const struct lp_scene *scene = task->scene;
513bf215546Sopenharmony_ci   const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
514bf215546Sopenharmony_ci   const struct lp_rast_state *state = task->state;
515bf215546Sopenharmony_ci   struct lp_fragment_shader_variant *variant = state->variant;
516bf215546Sopenharmony_ci   const struct lp_jit_texture *texture = &state->jit_context.textures[0];
517bf215546Sopenharmony_ci   struct pipe_surface *cbuf = scene->fb.cbufs[0];
518bf215546Sopenharmony_ci   const unsigned face_slice = cbuf->u.tex.first_layer;
519bf215546Sopenharmony_ci   const unsigned level = cbuf->u.tex.level;
520bf215546Sopenharmony_ci   struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   if (inputs->disable) {
525bf215546Sopenharmony_ci      /* This command was partially binned and has been disabled */
526bf215546Sopenharmony_ci      return;
527bf215546Sopenharmony_ci   }
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci   uint8_t *dst = llvmpipe_get_texture_image_address(lpt, face_slice, level);
530bf215546Sopenharmony_ci   if (!dst)
531bf215546Sopenharmony_ci      return;
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci   const unsigned dst_stride = lpt->row_stride[level];
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   const uint8_t *src = texture->base;
536bf215546Sopenharmony_ci   const unsigned src_stride = texture->row_stride[0];
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci   int src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f);
539bf215546Sopenharmony_ci   int src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f);
540bf215546Sopenharmony_ci
541bf215546Sopenharmony_ci   src_x += task->x;
542bf215546Sopenharmony_ci   src_y += task->y;
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci   if (0) {
545bf215546Sopenharmony_ci      union util_color uc;
546bf215546Sopenharmony_ci      uc.ui[0] = 0xff0000ff;
547bf215546Sopenharmony_ci      util_fill_rect(dst,
548bf215546Sopenharmony_ci                     cbuf->format,
549bf215546Sopenharmony_ci                     dst_stride,
550bf215546Sopenharmony_ci                     task->x,
551bf215546Sopenharmony_ci                     task->y,
552bf215546Sopenharmony_ci                     task->width,
553bf215546Sopenharmony_ci                     task->height,
554bf215546Sopenharmony_ci                     &uc);
555bf215546Sopenharmony_ci      return;
556bf215546Sopenharmony_ci   }
557bf215546Sopenharmony_ci
558bf215546Sopenharmony_ci   if (src_x >= 0 &&
559bf215546Sopenharmony_ci       src_y >= 0 &&
560bf215546Sopenharmony_ci       src_x + task->width <= texture->width &&
561bf215546Sopenharmony_ci       src_y + task->height <= texture->height) {
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci      if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA ||
564bf215546Sopenharmony_ci          (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
565bf215546Sopenharmony_ci           cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) {
566bf215546Sopenharmony_ci         util_copy_rect(dst,
567bf215546Sopenharmony_ci                        cbuf->format,
568bf215546Sopenharmony_ci                        dst_stride,
569bf215546Sopenharmony_ci                        task->x, task->y,
570bf215546Sopenharmony_ci                        task->width, task->height,
571bf215546Sopenharmony_ci                        src, src_stride,
572bf215546Sopenharmony_ci                        src_x, src_y);
573bf215546Sopenharmony_ci         return;
574bf215546Sopenharmony_ci      }
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_ci      if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) {
577bf215546Sopenharmony_ci         if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) {
578bf215546Sopenharmony_ci            dst += task->x * 4;
579bf215546Sopenharmony_ci            src += src_x * 4;
580bf215546Sopenharmony_ci            dst += task->y * dst_stride;
581bf215546Sopenharmony_ci            src += src_y * src_stride;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci            for (int y = 0; y < task->height; ++y) {
584bf215546Sopenharmony_ci               const uint32_t *src_row = (const uint32_t *)src;
585bf215546Sopenharmony_ci               uint32_t *dst_row = (uint32_t *)dst;
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci               for (int x = 0; x < task->width; ++x) {
588bf215546Sopenharmony_ci                  *dst_row++ = *src_row++ | 0xff000000;
589bf215546Sopenharmony_ci               }
590bf215546Sopenharmony_ci               dst += dst_stride;
591bf215546Sopenharmony_ci               src += src_stride;
592bf215546Sopenharmony_ci            }
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci            return;
595bf215546Sopenharmony_ci         }
596bf215546Sopenharmony_ci      }
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   }
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   /*
601bf215546Sopenharmony_ci    * Fall back to the jit shaders.
602bf215546Sopenharmony_ci    */
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   lp_rast_shade_tile_opaque(task, arg);
605bf215546Sopenharmony_ci}
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_cistatic void
609bf215546Sopenharmony_cilp_rast_blit_tile(struct lp_rasterizer_task *task,
610bf215546Sopenharmony_ci                  const union lp_rast_cmd_arg arg)
611bf215546Sopenharmony_ci{
612bf215546Sopenharmony_ci   /* This kindof just works, but isn't efficient:
613bf215546Sopenharmony_ci    */
614bf215546Sopenharmony_ci   lp_rast_blit_tile_to_dest(task, arg);
615bf215546Sopenharmony_ci}
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci/**
619bf215546Sopenharmony_ci * Begin a new occlusion query.
620bf215546Sopenharmony_ci * This is a bin command put in all bins.
621bf215546Sopenharmony_ci * Called per thread.
622bf215546Sopenharmony_ci */
623bf215546Sopenharmony_cistatic void
624bf215546Sopenharmony_cilp_rast_begin_query(struct lp_rasterizer_task *task,
625bf215546Sopenharmony_ci                    const union lp_rast_cmd_arg arg)
626bf215546Sopenharmony_ci{
627bf215546Sopenharmony_ci   struct llvmpipe_query *pq = arg.query_obj;
628bf215546Sopenharmony_ci
629bf215546Sopenharmony_ci   switch (pq->type) {
630bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER:
631bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
632bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
633bf215546Sopenharmony_ci      pq->start[task->thread_index] = task->thread_data.vis_counter;
634bf215546Sopenharmony_ci      break;
635bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS:
636bf215546Sopenharmony_ci      pq->start[task->thread_index] = task->thread_data.ps_invocations;
637bf215546Sopenharmony_ci      break;
638bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
639bf215546Sopenharmony_ci      pq->start[task->thread_index] = os_time_get_nano();
640bf215546Sopenharmony_ci      break;
641bf215546Sopenharmony_ci   default:
642bf215546Sopenharmony_ci      assert(0);
643bf215546Sopenharmony_ci      break;
644bf215546Sopenharmony_ci   }
645bf215546Sopenharmony_ci}
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci/**
649bf215546Sopenharmony_ci * End the current occlusion query.
650bf215546Sopenharmony_ci * This is a bin command put in all bins.
651bf215546Sopenharmony_ci * Called per thread.
652bf215546Sopenharmony_ci */
653bf215546Sopenharmony_cistatic void
654bf215546Sopenharmony_cilp_rast_end_query(struct lp_rasterizer_task *task,
655bf215546Sopenharmony_ci                  const union lp_rast_cmd_arg arg)
656bf215546Sopenharmony_ci{
657bf215546Sopenharmony_ci   struct llvmpipe_query *pq = arg.query_obj;
658bf215546Sopenharmony_ci
659bf215546Sopenharmony_ci   switch (pq->type) {
660bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER:
661bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
662bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
663bf215546Sopenharmony_ci      pq->end[task->thread_index] +=
664bf215546Sopenharmony_ci         task->thread_data.vis_counter - pq->start[task->thread_index];
665bf215546Sopenharmony_ci      pq->start[task->thread_index] = 0;
666bf215546Sopenharmony_ci      break;
667bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP:
668bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
669bf215546Sopenharmony_ci      pq->end[task->thread_index] = os_time_get_nano();
670bf215546Sopenharmony_ci      break;
671bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS:
672bf215546Sopenharmony_ci      pq->end[task->thread_index] +=
673bf215546Sopenharmony_ci         task->thread_data.ps_invocations - pq->start[task->thread_index];
674bf215546Sopenharmony_ci      pq->start[task->thread_index] = 0;
675bf215546Sopenharmony_ci      break;
676bf215546Sopenharmony_ci   default:
677bf215546Sopenharmony_ci      assert(0);
678bf215546Sopenharmony_ci      break;
679bf215546Sopenharmony_ci   }
680bf215546Sopenharmony_ci}
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_civoid
684bf215546Sopenharmony_cilp_rast_set_state(struct lp_rasterizer_task *task,
685bf215546Sopenharmony_ci                  const union lp_rast_cmd_arg arg)
686bf215546Sopenharmony_ci{
687bf215546Sopenharmony_ci   task->state = arg.set_state;
688bf215546Sopenharmony_ci}
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci
691bf215546Sopenharmony_ci/**
692bf215546Sopenharmony_ci * Called when we're done writing to a color tile.
693bf215546Sopenharmony_ci */
694bf215546Sopenharmony_cistatic void
695bf215546Sopenharmony_cilp_rast_tile_end(struct lp_rasterizer_task *task)
696bf215546Sopenharmony_ci{
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci   for (unsigned i = 0; i < task->scene->num_active_queries; ++i) {
699bf215546Sopenharmony_ci      lp_rast_end_query(task,
700bf215546Sopenharmony_ci                        lp_rast_arg_query(task->scene->active_queries[i]));
701bf215546Sopenharmony_ci   }
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   /* debug */
704bf215546Sopenharmony_ci   memset(task->color_tiles, 0, sizeof(task->color_tiles));
705bf215546Sopenharmony_ci   task->depth_tile = NULL;
706bf215546Sopenharmony_ci   task->bin = NULL;
707bf215546Sopenharmony_ci}
708bf215546Sopenharmony_ci
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci/* Currently have two rendering paths only - the general case triangle
711bf215546Sopenharmony_ci * path and the super-specialized blit/clear path.
712bf215546Sopenharmony_ci */
713bf215546Sopenharmony_ci#define TRI   ((LP_RAST_FLAGS_TRI <<1)-1)     /* general case */
714bf215546Sopenharmony_ci#define RECT  ((LP_RAST_FLAGS_RECT<<1)-1)     /* direct rectangle rasterizer */
715bf215546Sopenharmony_ci#define BLIT  ((LP_RAST_FLAGS_BLIT<<1)-1)     /* write direct-to-dest */
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_cistatic const unsigned
718bf215546Sopenharmony_cirast_flags[] = {
719bf215546Sopenharmony_ci   BLIT,                        /* clear color */
720bf215546Sopenharmony_ci   TRI,                         /* clear zstencil */
721bf215546Sopenharmony_ci   TRI,                         /* triangle_1 */
722bf215546Sopenharmony_ci   TRI,                         /* triangle_2 */
723bf215546Sopenharmony_ci   TRI,                         /* triangle_3 */
724bf215546Sopenharmony_ci   TRI,                         /* triangle_4 */
725bf215546Sopenharmony_ci   TRI,                         /* triangle_5 */
726bf215546Sopenharmony_ci   TRI,                         /* triangle_6 */
727bf215546Sopenharmony_ci   TRI,                         /* triangle_7 */
728bf215546Sopenharmony_ci   TRI,                         /* triangle_8 */
729bf215546Sopenharmony_ci   TRI,                         /* triangle_3_4 */
730bf215546Sopenharmony_ci   TRI,                         /* triangle_3_16 */
731bf215546Sopenharmony_ci   TRI,                         /* triangle_4_16 */
732bf215546Sopenharmony_ci   RECT,                        /* shade_tile */
733bf215546Sopenharmony_ci   RECT,                        /* shade_tile_opaque */
734bf215546Sopenharmony_ci   TRI,                         /* begin_query */
735bf215546Sopenharmony_ci   TRI,                         /* end_query */
736bf215546Sopenharmony_ci   BLIT,                        /* set_state, */
737bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_1 */
738bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_2 */
739bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_3 */
740bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_4 */
741bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_5 */
742bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_6 */
743bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_7 */
744bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_8 */
745bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_3_4 */
746bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_3_16 */
747bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_32_4_16 */
748bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_1 */
749bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_2 */
750bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_3 */
751bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_4 */
752bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_5 */
753bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_6 */
754bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_7 */
755bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_8 */
756bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_3_4 */
757bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_3_16 */
758bf215546Sopenharmony_ci   TRI,                         /* lp_rast_triangle_ms_4_16 */
759bf215546Sopenharmony_ci   RECT,                        /* rectangle */
760bf215546Sopenharmony_ci   BLIT,                        /* blit */
761bf215546Sopenharmony_ci};
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci/*
764bf215546Sopenharmony_ci */
765bf215546Sopenharmony_cistatic const lp_rast_cmd_func
766bf215546Sopenharmony_cidispatch_blit[] = {
767bf215546Sopenharmony_ci   lp_rast_clear_color,
768bf215546Sopenharmony_ci   NULL,                        /* clear_zstencil */
769bf215546Sopenharmony_ci   NULL,                        /* triangle_1 */
770bf215546Sopenharmony_ci   NULL,                        /* triangle_2 */
771bf215546Sopenharmony_ci   NULL,                        /* triangle_3 */
772bf215546Sopenharmony_ci   NULL,                        /* triangle_4 */
773bf215546Sopenharmony_ci   NULL,                        /* triangle_5 */
774bf215546Sopenharmony_ci   NULL,                        /* triangle_6 */
775bf215546Sopenharmony_ci   NULL,                        /* triangle_7 */
776bf215546Sopenharmony_ci   NULL,                        /* triangle_8 */
777bf215546Sopenharmony_ci   NULL,                        /* triangle_3_4 */
778bf215546Sopenharmony_ci   NULL,                        /* triangle_3_16 */
779bf215546Sopenharmony_ci   NULL,                        /* triangle_4_16 */
780bf215546Sopenharmony_ci   NULL,                        /* shade_tile */
781bf215546Sopenharmony_ci   NULL,                        /* shade_tile_opaque */
782bf215546Sopenharmony_ci   NULL,                        /* begin_query */
783bf215546Sopenharmony_ci   NULL,                        /* end_query */
784bf215546Sopenharmony_ci   lp_rast_set_state,           /* set_state */
785bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_1 */
786bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_2 */
787bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_3 */
788bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_4 */
789bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_5 */
790bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_6 */
791bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_7 */
792bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_8 */
793bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_3_4 */
794bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_3_16 */
795bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_32_4_16 */
796bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_1 */
797bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_2 */
798bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_3 */
799bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_4 */
800bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_5 */
801bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_6 */
802bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_7 */
803bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_8 */
804bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_3_4 */
805bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_3_16 */
806bf215546Sopenharmony_ci   NULL,                        /* lp_rast_triangle_ms_4_16 */
807bf215546Sopenharmony_ci   NULL,                        /* rectangle */
808bf215546Sopenharmony_ci   lp_rast_blit_tile_to_dest,
809bf215546Sopenharmony_ci};
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci/* Triangle and general case rasterization: Use the SOA llvm shdaers,
814bf215546Sopenharmony_ci * an active swizzled tile for each color buf, etc.  Don't blit/clear
815bf215546Sopenharmony_ci * directly to destination surface as we know there are swizzled
816bf215546Sopenharmony_ci * operations coming.
817bf215546Sopenharmony_ci */
818bf215546Sopenharmony_cistatic const lp_rast_cmd_func
819bf215546Sopenharmony_cidispatch_tri[] = {
820bf215546Sopenharmony_ci   lp_rast_clear_color,
821bf215546Sopenharmony_ci   lp_rast_clear_zstencil,
822bf215546Sopenharmony_ci   lp_rast_triangle_1,
823bf215546Sopenharmony_ci   lp_rast_triangle_2,
824bf215546Sopenharmony_ci   lp_rast_triangle_3,
825bf215546Sopenharmony_ci   lp_rast_triangle_4,
826bf215546Sopenharmony_ci   lp_rast_triangle_5,
827bf215546Sopenharmony_ci   lp_rast_triangle_6,
828bf215546Sopenharmony_ci   lp_rast_triangle_7,
829bf215546Sopenharmony_ci   lp_rast_triangle_8,
830bf215546Sopenharmony_ci   lp_rast_triangle_3_4,
831bf215546Sopenharmony_ci   lp_rast_triangle_3_16,
832bf215546Sopenharmony_ci   lp_rast_triangle_4_16,
833bf215546Sopenharmony_ci   lp_rast_shade_tile,
834bf215546Sopenharmony_ci   lp_rast_shade_tile_opaque,
835bf215546Sopenharmony_ci   lp_rast_begin_query,
836bf215546Sopenharmony_ci   lp_rast_end_query,
837bf215546Sopenharmony_ci   lp_rast_set_state,
838bf215546Sopenharmony_ci   lp_rast_triangle_32_1,
839bf215546Sopenharmony_ci   lp_rast_triangle_32_2,
840bf215546Sopenharmony_ci   lp_rast_triangle_32_3,
841bf215546Sopenharmony_ci   lp_rast_triangle_32_4,
842bf215546Sopenharmony_ci   lp_rast_triangle_32_5,
843bf215546Sopenharmony_ci   lp_rast_triangle_32_6,
844bf215546Sopenharmony_ci   lp_rast_triangle_32_7,
845bf215546Sopenharmony_ci   lp_rast_triangle_32_8,
846bf215546Sopenharmony_ci   lp_rast_triangle_32_3_4,
847bf215546Sopenharmony_ci   lp_rast_triangle_32_3_16,
848bf215546Sopenharmony_ci   lp_rast_triangle_32_4_16,
849bf215546Sopenharmony_ci   lp_rast_triangle_ms_1,
850bf215546Sopenharmony_ci   lp_rast_triangle_ms_2,
851bf215546Sopenharmony_ci   lp_rast_triangle_ms_3,
852bf215546Sopenharmony_ci   lp_rast_triangle_ms_4,
853bf215546Sopenharmony_ci   lp_rast_triangle_ms_5,
854bf215546Sopenharmony_ci   lp_rast_triangle_ms_6,
855bf215546Sopenharmony_ci   lp_rast_triangle_ms_7,
856bf215546Sopenharmony_ci   lp_rast_triangle_ms_8,
857bf215546Sopenharmony_ci   lp_rast_triangle_ms_3_4,
858bf215546Sopenharmony_ci   lp_rast_triangle_ms_3_16,
859bf215546Sopenharmony_ci   lp_rast_triangle_ms_4_16,
860bf215546Sopenharmony_ci   lp_rast_rectangle,
861bf215546Sopenharmony_ci   lp_rast_blit_tile,
862bf215546Sopenharmony_ci};
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci/* Debug rasterization with most fastpaths disabled.
866bf215546Sopenharmony_ci */
867bf215546Sopenharmony_cistatic const lp_rast_cmd_func
868bf215546Sopenharmony_cidispatch_tri_debug[] =
869bf215546Sopenharmony_ci{
870bf215546Sopenharmony_ci   lp_rast_clear_color,
871bf215546Sopenharmony_ci   lp_rast_clear_zstencil,
872bf215546Sopenharmony_ci   lp_rast_triangle_1,
873bf215546Sopenharmony_ci   lp_rast_triangle_2,
874bf215546Sopenharmony_ci   lp_rast_triangle_3,
875bf215546Sopenharmony_ci   lp_rast_triangle_4,
876bf215546Sopenharmony_ci   lp_rast_triangle_5,
877bf215546Sopenharmony_ci   lp_rast_triangle_6,
878bf215546Sopenharmony_ci   lp_rast_triangle_7,
879bf215546Sopenharmony_ci   lp_rast_triangle_8,
880bf215546Sopenharmony_ci   lp_rast_triangle_3_4,
881bf215546Sopenharmony_ci   lp_rast_triangle_3_16,
882bf215546Sopenharmony_ci   lp_rast_triangle_4_16,
883bf215546Sopenharmony_ci   lp_rast_shade_tile,
884bf215546Sopenharmony_ci   lp_rast_shade_tile,
885bf215546Sopenharmony_ci   lp_rast_begin_query,
886bf215546Sopenharmony_ci   lp_rast_end_query,
887bf215546Sopenharmony_ci   lp_rast_set_state,
888bf215546Sopenharmony_ci   lp_rast_triangle_32_1,
889bf215546Sopenharmony_ci   lp_rast_triangle_32_2,
890bf215546Sopenharmony_ci   lp_rast_triangle_32_3,
891bf215546Sopenharmony_ci   lp_rast_triangle_32_4,
892bf215546Sopenharmony_ci   lp_rast_triangle_32_5,
893bf215546Sopenharmony_ci   lp_rast_triangle_32_6,
894bf215546Sopenharmony_ci   lp_rast_triangle_32_7,
895bf215546Sopenharmony_ci   lp_rast_triangle_32_8,
896bf215546Sopenharmony_ci   lp_rast_triangle_32_3_4,
897bf215546Sopenharmony_ci   lp_rast_triangle_32_3_16,
898bf215546Sopenharmony_ci   lp_rast_triangle_32_4_16,
899bf215546Sopenharmony_ci   lp_rast_triangle_ms_1,
900bf215546Sopenharmony_ci   lp_rast_triangle_ms_2,
901bf215546Sopenharmony_ci   lp_rast_triangle_ms_3,
902bf215546Sopenharmony_ci   lp_rast_triangle_ms_4,
903bf215546Sopenharmony_ci   lp_rast_triangle_ms_5,
904bf215546Sopenharmony_ci   lp_rast_triangle_ms_6,
905bf215546Sopenharmony_ci   lp_rast_triangle_ms_7,
906bf215546Sopenharmony_ci   lp_rast_triangle_ms_8,
907bf215546Sopenharmony_ci   lp_rast_triangle_ms_3_4,
908bf215546Sopenharmony_ci   lp_rast_triangle_ms_3_16,
909bf215546Sopenharmony_ci   lp_rast_triangle_ms_4_16,
910bf215546Sopenharmony_ci   lp_rast_rectangle,
911bf215546Sopenharmony_ci   lp_rast_shade_tile,
912bf215546Sopenharmony_ci};
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_cistruct lp_bin_info
916bf215546Sopenharmony_cilp_characterize_bin(const struct cmd_bin *bin)
917bf215546Sopenharmony_ci{
918bf215546Sopenharmony_ci   unsigned andflags = ~0, j = 0;
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX);
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   for (const struct cmd_block *block = bin->head; block; block = block->next) {
923bf215546Sopenharmony_ci      for (unsigned k = 0; k < block->count; k++, j++) {
924bf215546Sopenharmony_ci         andflags &= rast_flags[block->cmd[k]];
925bf215546Sopenharmony_ci      }
926bf215546Sopenharmony_ci   }
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_ci   struct lp_bin_info info;
929bf215546Sopenharmony_ci   info.type = andflags;
930bf215546Sopenharmony_ci   info.count = j;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci   return info;
933bf215546Sopenharmony_ci}
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_cistatic void
937bf215546Sopenharmony_ciblit_rasterize_bin(struct lp_rasterizer_task *task,
938bf215546Sopenharmony_ci                   const struct cmd_bin *bin)
939bf215546Sopenharmony_ci{
940bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX);
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_ci   if (0) debug_printf("%s\n", __FUNCTION__);
943bf215546Sopenharmony_ci   for (const struct cmd_block *block = bin->head; block; block = block->next) {
944bf215546Sopenharmony_ci      for (unsigned k = 0; k < block->count; k++) {
945bf215546Sopenharmony_ci         dispatch_blit[block->cmd[k]](task, block->arg[k]);
946bf215546Sopenharmony_ci      }
947bf215546Sopenharmony_ci   }
948bf215546Sopenharmony_ci}
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_cistatic void
952bf215546Sopenharmony_citri_rasterize_bin(struct lp_rasterizer_task *task,
953bf215546Sopenharmony_ci                  const struct cmd_bin *bin,
954bf215546Sopenharmony_ci                  int x, int y)
955bf215546Sopenharmony_ci{
956bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX);
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci   for (const struct cmd_block *block = bin->head; block; block = block->next) {
959bf215546Sopenharmony_ci      for (unsigned k = 0; k < block->count; k++) {
960bf215546Sopenharmony_ci         dispatch_tri[block->cmd[k]](task, block->arg[k]);
961bf215546Sopenharmony_ci      }
962bf215546Sopenharmony_ci   }
963bf215546Sopenharmony_ci}
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_cistatic void
967bf215546Sopenharmony_cidebug_rasterize_bin(struct lp_rasterizer_task *task,
968bf215546Sopenharmony_ci                  const struct cmd_bin *bin)
969bf215546Sopenharmony_ci{
970bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX);
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci   for (const struct cmd_block *block = bin->head; block; block = block->next) {
973bf215546Sopenharmony_ci      for (unsigned k = 0; k < block->count; k++) {
974bf215546Sopenharmony_ci         dispatch_tri_debug[block->cmd[k]](task, block->arg[k]);
975bf215546Sopenharmony_ci      }
976bf215546Sopenharmony_ci   }
977bf215546Sopenharmony_ci}
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci/**
981bf215546Sopenharmony_ci * Rasterize commands for a single bin.
982bf215546Sopenharmony_ci * \param x, y  position of the bin's tile in the framebuffer
983bf215546Sopenharmony_ci * Must be called between lp_rast_begin() and lp_rast_end().
984bf215546Sopenharmony_ci * Called per thread.
985bf215546Sopenharmony_ci */
986bf215546Sopenharmony_cistatic void
987bf215546Sopenharmony_cirasterize_bin(struct lp_rasterizer_task *task,
988bf215546Sopenharmony_ci              const struct cmd_bin *bin, int x, int y)
989bf215546Sopenharmony_ci{
990bf215546Sopenharmony_ci   struct lp_bin_info info = lp_characterize_bin(bin);
991bf215546Sopenharmony_ci
992bf215546Sopenharmony_ci   lp_rast_tile_begin(task, bin, x, y);
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_ci   if (LP_DEBUG & DEBUG_NO_FASTPATH) {
995bf215546Sopenharmony_ci      debug_rasterize_bin(task, bin);
996bf215546Sopenharmony_ci   } else if (info.type & LP_RAST_FLAGS_BLIT) {
997bf215546Sopenharmony_ci      blit_rasterize_bin(task, bin);
998bf215546Sopenharmony_ci   } else if (task->scene->permit_linear_rasterizer &&
999bf215546Sopenharmony_ci            !(LP_PERF & PERF_NO_RAST_LINEAR) &&
1000bf215546Sopenharmony_ci            (info.type & LP_RAST_FLAGS_RECT)) {
1001bf215546Sopenharmony_ci      lp_linear_rasterize_bin(task, bin);
1002bf215546Sopenharmony_ci   } else {
1003bf215546Sopenharmony_ci      tri_rasterize_bin(task, bin, x, y);
1004bf215546Sopenharmony_ci   }
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci   lp_rast_tile_end(task);
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci#ifdef DEBUG
1009bf215546Sopenharmony_ci   /* Debug/Perf flags:
1010bf215546Sopenharmony_ci    */
1011bf215546Sopenharmony_ci   if (bin->head->count == 1) {
1012bf215546Sopenharmony_ci      if (bin->head->cmd[0] == LP_RAST_OP_BLIT)
1013bf215546Sopenharmony_ci         LP_COUNT(nr_pure_blit_64);
1014bf215546Sopenharmony_ci      else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
1015bf215546Sopenharmony_ci         LP_COUNT(nr_pure_shade_opaque_64);
1016bf215546Sopenharmony_ci      else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
1017bf215546Sopenharmony_ci         LP_COUNT(nr_pure_shade_64);
1018bf215546Sopenharmony_ci   }
1019bf215546Sopenharmony_ci#endif
1020bf215546Sopenharmony_ci}
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci
1023bf215546Sopenharmony_ci/* An empty bin is one that just loads the contents of the tile and
1024bf215546Sopenharmony_ci * stores them again unchanged.  This typically happens when bins have
1025bf215546Sopenharmony_ci * been flushed for some reason in the middle of a frame, or when
1026bf215546Sopenharmony_ci * incremental updates are being made to a render target.
1027bf215546Sopenharmony_ci *
1028bf215546Sopenharmony_ci * Try to avoid doing pointless work in this case.
1029bf215546Sopenharmony_ci */
1030bf215546Sopenharmony_cistatic boolean
1031bf215546Sopenharmony_ciis_empty_bin(const struct cmd_bin *bin)
1032bf215546Sopenharmony_ci{
1033bf215546Sopenharmony_ci   return bin->head == NULL;
1034bf215546Sopenharmony_ci}
1035bf215546Sopenharmony_ci
1036bf215546Sopenharmony_ci
1037bf215546Sopenharmony_ci/**
1038bf215546Sopenharmony_ci * Rasterize/execute all bins within a scene.
1039bf215546Sopenharmony_ci * Called per thread.
1040bf215546Sopenharmony_ci */
1041bf215546Sopenharmony_cistatic void
1042bf215546Sopenharmony_cirasterize_scene(struct lp_rasterizer_task *task,
1043bf215546Sopenharmony_ci                struct lp_scene *scene)
1044bf215546Sopenharmony_ci{
1045bf215546Sopenharmony_ci   task->scene = scene;
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci   /* Clear the cache tags. This should not always be necessary but
1048bf215546Sopenharmony_ci      simpler for now. */
1049bf215546Sopenharmony_ci#if LP_USE_TEXTURE_CACHE
1050bf215546Sopenharmony_ci   memset(task->thread_data.cache->cache_tags, 0,
1051bf215546Sopenharmony_ci          sizeof(task->thread_data.cache->cache_tags));
1052bf215546Sopenharmony_ci#if LP_BUILD_FORMAT_CACHE_DEBUG
1053bf215546Sopenharmony_ci   task->thread_data.cache->cache_access_total = 0;
1054bf215546Sopenharmony_ci   task->thread_data.cache->cache_access_miss = 0;
1055bf215546Sopenharmony_ci#endif
1056bf215546Sopenharmony_ci#endif
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci   if (!task->rast->no_rast) {
1059bf215546Sopenharmony_ci      /* loop over scene bins, rasterize each */
1060bf215546Sopenharmony_ci      {
1061bf215546Sopenharmony_ci         struct cmd_bin *bin;
1062bf215546Sopenharmony_ci         int i, j;
1063bf215546Sopenharmony_ci
1064bf215546Sopenharmony_ci         assert(scene);
1065bf215546Sopenharmony_ci         while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
1066bf215546Sopenharmony_ci            if (!is_empty_bin(bin))
1067bf215546Sopenharmony_ci               rasterize_bin(task, bin, i, j);
1068bf215546Sopenharmony_ci         }
1069bf215546Sopenharmony_ci      }
1070bf215546Sopenharmony_ci   }
1071bf215546Sopenharmony_ci
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci#if LP_BUILD_FORMAT_CACHE_DEBUG
1074bf215546Sopenharmony_ci   {
1075bf215546Sopenharmony_ci      uint64_t total, miss;
1076bf215546Sopenharmony_ci      total = task->thread_data.cache->cache_access_total;
1077bf215546Sopenharmony_ci      miss = task->thread_data.cache->cache_access_miss;
1078bf215546Sopenharmony_ci      if (total) {
1079bf215546Sopenharmony_ci         debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
1080bf215546Sopenharmony_ci                 task->thread_index, (long long unsigned)total,
1081bf215546Sopenharmony_ci                 (long long unsigned)miss,
1082bf215546Sopenharmony_ci                 (float)(total - miss)/(float)total);
1083bf215546Sopenharmony_ci      }
1084bf215546Sopenharmony_ci   }
1085bf215546Sopenharmony_ci#endif
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_ci   if (scene->fence) {
1088bf215546Sopenharmony_ci      lp_fence_signal(scene->fence);
1089bf215546Sopenharmony_ci   }
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci   task->scene = NULL;
1092bf215546Sopenharmony_ci}
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_ci/**
1096bf215546Sopenharmony_ci * Called by setup module when it has something for us to render.
1097bf215546Sopenharmony_ci */
1098bf215546Sopenharmony_civoid
1099bf215546Sopenharmony_cilp_rast_queue_scene(struct lp_rasterizer *rast,
1100bf215546Sopenharmony_ci                    struct lp_scene *scene)
1101bf215546Sopenharmony_ci{
1102bf215546Sopenharmony_ci   LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci   lp_fence_reference(&rast->last_fence, scene->fence);
1105bf215546Sopenharmony_ci   if (rast->last_fence)
1106bf215546Sopenharmony_ci      rast->last_fence->issued = TRUE;
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_ci   if (rast->num_threads == 0) {
1109bf215546Sopenharmony_ci      /* no threading */
1110bf215546Sopenharmony_ci      unsigned fpstate = util_fpstate_get();
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci      /* Make sure that denorms are treated like zeros. This is
1113bf215546Sopenharmony_ci       * the behavior required by D3D10. OpenGL doesn't care.
1114bf215546Sopenharmony_ci       */
1115bf215546Sopenharmony_ci      util_fpstate_set_denorms_to_zero(fpstate);
1116bf215546Sopenharmony_ci
1117bf215546Sopenharmony_ci      lp_rast_begin(rast, scene);
1118bf215546Sopenharmony_ci
1119bf215546Sopenharmony_ci      rasterize_scene(&rast->tasks[0], scene);
1120bf215546Sopenharmony_ci
1121bf215546Sopenharmony_ci      lp_rast_end(rast);
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci      util_fpstate_set(fpstate);
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_ci      rast->curr_scene = NULL;
1126bf215546Sopenharmony_ci   }
1127bf215546Sopenharmony_ci   else {
1128bf215546Sopenharmony_ci      /* threaded rendering! */
1129bf215546Sopenharmony_ci      unsigned i;
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ci      lp_scene_enqueue(rast->full_scenes, scene);
1132bf215546Sopenharmony_ci
1133bf215546Sopenharmony_ci      /* signal the threads that there's work to do */
1134bf215546Sopenharmony_ci      for (i = 0; i < rast->num_threads; i++) {
1135bf215546Sopenharmony_ci         pipe_semaphore_signal(&rast->tasks[i].work_ready);
1136bf215546Sopenharmony_ci      }
1137bf215546Sopenharmony_ci   }
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_ci   LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
1140bf215546Sopenharmony_ci}
1141bf215546Sopenharmony_ci
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_civoid
1144bf215546Sopenharmony_cilp_rast_finish(struct lp_rasterizer *rast)
1145bf215546Sopenharmony_ci{
1146bf215546Sopenharmony_ci   if (rast->num_threads == 0) {
1147bf215546Sopenharmony_ci      /* nothing to do */
1148bf215546Sopenharmony_ci   }
1149bf215546Sopenharmony_ci   else {
1150bf215546Sopenharmony_ci      int i;
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci      /* wait for work to complete */
1153bf215546Sopenharmony_ci      for (i = 0; i < rast->num_threads; i++) {
1154bf215546Sopenharmony_ci         pipe_semaphore_wait(&rast->tasks[i].work_done);
1155bf215546Sopenharmony_ci      }
1156bf215546Sopenharmony_ci   }
1157bf215546Sopenharmony_ci}
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_ci
1160bf215546Sopenharmony_ci/**
1161bf215546Sopenharmony_ci * This is the thread's main entrypoint.
1162bf215546Sopenharmony_ci * It's a simple loop:
1163bf215546Sopenharmony_ci *   1. wait for work
1164bf215546Sopenharmony_ci *   2. do work
1165bf215546Sopenharmony_ci *   3. signal that we're done
1166bf215546Sopenharmony_ci */
1167bf215546Sopenharmony_cistatic int
1168bf215546Sopenharmony_cithread_function(void *init_data)
1169bf215546Sopenharmony_ci{
1170bf215546Sopenharmony_ci   struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
1171bf215546Sopenharmony_ci   struct lp_rasterizer *rast = task->rast;
1172bf215546Sopenharmony_ci   boolean debug = false;
1173bf215546Sopenharmony_ci   char thread_name[16];
1174bf215546Sopenharmony_ci
1175bf215546Sopenharmony_ci   snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index);
1176bf215546Sopenharmony_ci   u_thread_setname(thread_name);
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_ci   /* Make sure that denorms are treated like zeros. This is
1179bf215546Sopenharmony_ci    * the behavior required by D3D10. OpenGL doesn't care.
1180bf215546Sopenharmony_ci    */
1181bf215546Sopenharmony_ci   unsigned fpstate = util_fpstate_get();
1182bf215546Sopenharmony_ci   util_fpstate_set_denorms_to_zero(fpstate);
1183bf215546Sopenharmony_ci
1184bf215546Sopenharmony_ci   while (1) {
1185bf215546Sopenharmony_ci      /* wait for work */
1186bf215546Sopenharmony_ci      if (debug)
1187bf215546Sopenharmony_ci         debug_printf("thread %d waiting for work\n", task->thread_index);
1188bf215546Sopenharmony_ci      pipe_semaphore_wait(&task->work_ready);
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_ci      if (rast->exit_flag)
1191bf215546Sopenharmony_ci         break;
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_ci      if (task->thread_index == 0) {
1194bf215546Sopenharmony_ci         /* thread[0]:
1195bf215546Sopenharmony_ci          *  - get next scene to rasterize
1196bf215546Sopenharmony_ci          *  - map the framebuffer surfaces
1197bf215546Sopenharmony_ci          */
1198bf215546Sopenharmony_ci         lp_rast_begin(rast, lp_scene_dequeue(rast->full_scenes, TRUE));
1199bf215546Sopenharmony_ci      }
1200bf215546Sopenharmony_ci
1201bf215546Sopenharmony_ci      /* Wait for all threads to get here so that threads[1+] don't
1202bf215546Sopenharmony_ci       * get a null rast->curr_scene pointer.
1203bf215546Sopenharmony_ci       */
1204bf215546Sopenharmony_ci      util_barrier_wait(&rast->barrier);
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci      /* do work */
1207bf215546Sopenharmony_ci      if (debug)
1208bf215546Sopenharmony_ci         debug_printf("thread %d doing work\n", task->thread_index);
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_ci      rasterize_scene(task, rast->curr_scene);
1211bf215546Sopenharmony_ci
1212bf215546Sopenharmony_ci      /* wait for all threads to finish with this scene */
1213bf215546Sopenharmony_ci      util_barrier_wait(&rast->barrier);
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_ci      /* XXX: shouldn't be necessary:
1216bf215546Sopenharmony_ci       */
1217bf215546Sopenharmony_ci      if (task->thread_index == 0) {
1218bf215546Sopenharmony_ci         lp_rast_end(rast);
1219bf215546Sopenharmony_ci      }
1220bf215546Sopenharmony_ci
1221bf215546Sopenharmony_ci      /* signal done with work */
1222bf215546Sopenharmony_ci      if (debug)
1223bf215546Sopenharmony_ci         debug_printf("thread %d done working\n", task->thread_index);
1224bf215546Sopenharmony_ci
1225bf215546Sopenharmony_ci      pipe_semaphore_signal(&task->work_done);
1226bf215546Sopenharmony_ci   }
1227bf215546Sopenharmony_ci
1228bf215546Sopenharmony_ci#ifdef _WIN32
1229bf215546Sopenharmony_ci   pipe_semaphore_signal(&task->work_done);
1230bf215546Sopenharmony_ci#endif
1231bf215546Sopenharmony_ci
1232bf215546Sopenharmony_ci   return 0;
1233bf215546Sopenharmony_ci}
1234bf215546Sopenharmony_ci
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_ci/**
1237bf215546Sopenharmony_ci * Initialize semaphores and spawn the threads.
1238bf215546Sopenharmony_ci */
1239bf215546Sopenharmony_cistatic void
1240bf215546Sopenharmony_cicreate_rast_threads(struct lp_rasterizer *rast)
1241bf215546Sopenharmony_ci{
1242bf215546Sopenharmony_ci   /* NOTE: if num_threads is zero, we won't use any threads */
1243bf215546Sopenharmony_ci   for (unsigned i = 0; i < rast->num_threads; i++) {
1244bf215546Sopenharmony_ci      pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
1245bf215546Sopenharmony_ci      pipe_semaphore_init(&rast->tasks[i].work_done, 0);
1246bf215546Sopenharmony_ci      if (thrd_success != u_thread_create(rast->threads + i, thread_function,
1247bf215546Sopenharmony_ci                                            (void *) &rast->tasks[i])) {
1248bf215546Sopenharmony_ci         rast->num_threads = i; /* previous thread is max */
1249bf215546Sopenharmony_ci         break;
1250bf215546Sopenharmony_ci      }
1251bf215546Sopenharmony_ci   }
1252bf215546Sopenharmony_ci}
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci
1255bf215546Sopenharmony_ci/**
1256bf215546Sopenharmony_ci * Create new lp_rasterizer.  If num_threads is zero, don't create any
1257bf215546Sopenharmony_ci * new threads, do rendering synchronously.
1258bf215546Sopenharmony_ci * \param num_threads  number of rasterizer threads to create
1259bf215546Sopenharmony_ci */
1260bf215546Sopenharmony_cistruct lp_rasterizer *
1261bf215546Sopenharmony_cilp_rast_create(unsigned num_threads)
1262bf215546Sopenharmony_ci{
1263bf215546Sopenharmony_ci   struct lp_rasterizer *rast;
1264bf215546Sopenharmony_ci   unsigned i;
1265bf215546Sopenharmony_ci
1266bf215546Sopenharmony_ci   rast = CALLOC_STRUCT(lp_rasterizer);
1267bf215546Sopenharmony_ci   if (!rast) {
1268bf215546Sopenharmony_ci      goto no_rast;
1269bf215546Sopenharmony_ci   }
1270bf215546Sopenharmony_ci
1271bf215546Sopenharmony_ci   rast->full_scenes = lp_scene_queue_create();
1272bf215546Sopenharmony_ci   if (!rast->full_scenes) {
1273bf215546Sopenharmony_ci      goto no_full_scenes;
1274bf215546Sopenharmony_ci   }
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci   for (i = 0; i < MAX2(1, num_threads); i++) {
1277bf215546Sopenharmony_ci      struct lp_rasterizer_task *task = &rast->tasks[i];
1278bf215546Sopenharmony_ci      task->rast = rast;
1279bf215546Sopenharmony_ci      task->thread_index = i;
1280bf215546Sopenharmony_ci      task->thread_data.cache =
1281bf215546Sopenharmony_ci         align_malloc(sizeof(struct lp_build_format_cache), 16);
1282bf215546Sopenharmony_ci      if (!task->thread_data.cache) {
1283bf215546Sopenharmony_ci         goto no_thread_data_cache;
1284bf215546Sopenharmony_ci      }
1285bf215546Sopenharmony_ci   }
1286bf215546Sopenharmony_ci
1287bf215546Sopenharmony_ci   rast->num_threads = num_threads;
1288bf215546Sopenharmony_ci
1289bf215546Sopenharmony_ci   rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci   create_rast_threads(rast);
1292bf215546Sopenharmony_ci
1293bf215546Sopenharmony_ci   /* for synchronizing rasterization threads */
1294bf215546Sopenharmony_ci   if (rast->num_threads > 0) {
1295bf215546Sopenharmony_ci      util_barrier_init(&rast->barrier, rast->num_threads);
1296bf215546Sopenharmony_ci   }
1297bf215546Sopenharmony_ci
1298bf215546Sopenharmony_ci   memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
1299bf215546Sopenharmony_ci
1300bf215546Sopenharmony_ci   return rast;
1301bf215546Sopenharmony_ci
1302bf215546Sopenharmony_cino_thread_data_cache:
1303bf215546Sopenharmony_ci   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
1304bf215546Sopenharmony_ci      if (rast->tasks[i].thread_data.cache) {
1305bf215546Sopenharmony_ci         align_free(rast->tasks[i].thread_data.cache);
1306bf215546Sopenharmony_ci      }
1307bf215546Sopenharmony_ci   }
1308bf215546Sopenharmony_ci
1309bf215546Sopenharmony_ci   lp_scene_queue_destroy(rast->full_scenes);
1310bf215546Sopenharmony_cino_full_scenes:
1311bf215546Sopenharmony_ci   FREE(rast);
1312bf215546Sopenharmony_cino_rast:
1313bf215546Sopenharmony_ci   return NULL;
1314bf215546Sopenharmony_ci}
1315bf215546Sopenharmony_ci
1316bf215546Sopenharmony_ci
1317bf215546Sopenharmony_ci/* Shutdown:
1318bf215546Sopenharmony_ci */
1319bf215546Sopenharmony_civoid
1320bf215546Sopenharmony_cilp_rast_destroy(struct lp_rasterizer *rast)
1321bf215546Sopenharmony_ci{
1322bf215546Sopenharmony_ci   /* Set exit_flag and signal each thread's work_ready semaphore.
1323bf215546Sopenharmony_ci    * Each thread will be woken up, notice that the exit_flag is set and
1324bf215546Sopenharmony_ci    * break out of its main loop.  The thread will then exit.
1325bf215546Sopenharmony_ci    */
1326bf215546Sopenharmony_ci   rast->exit_flag = TRUE;
1327bf215546Sopenharmony_ci   for (unsigned i = 0; i < rast->num_threads; i++) {
1328bf215546Sopenharmony_ci      pipe_semaphore_signal(&rast->tasks[i].work_ready);
1329bf215546Sopenharmony_ci   }
1330bf215546Sopenharmony_ci
1331bf215546Sopenharmony_ci   /* Wait for threads to terminate before cleaning up per-thread data.
1332bf215546Sopenharmony_ci    * We don't actually call pipe_thread_wait to avoid dead lock on Windows
1333bf215546Sopenharmony_ci    * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
1334bf215546Sopenharmony_ci   for (unsigned i = 0; i < rast->num_threads; i++) {
1335bf215546Sopenharmony_ci#ifdef _WIN32
1336bf215546Sopenharmony_ci      /* Threads might already be dead - Windows apparently terminates
1337bf215546Sopenharmony_ci       * other threads when returning from main.
1338bf215546Sopenharmony_ci       */
1339bf215546Sopenharmony_ci      DWORD exit_code = STILL_ACTIVE;
1340bf215546Sopenharmony_ci      if (GetExitCodeThread(rast->threads[i], &exit_code) &&
1341bf215546Sopenharmony_ci          exit_code == STILL_ACTIVE) {
1342bf215546Sopenharmony_ci         pipe_semaphore_wait(&rast->tasks[i].work_done);
1343bf215546Sopenharmony_ci      }
1344bf215546Sopenharmony_ci#else
1345bf215546Sopenharmony_ci      thrd_join(rast->threads[i], NULL);
1346bf215546Sopenharmony_ci#endif
1347bf215546Sopenharmony_ci   }
1348bf215546Sopenharmony_ci
1349bf215546Sopenharmony_ci   /* Clean up per-thread data */
1350bf215546Sopenharmony_ci   for (unsigned i = 0; i < rast->num_threads; i++) {
1351bf215546Sopenharmony_ci      pipe_semaphore_destroy(&rast->tasks[i].work_ready);
1352bf215546Sopenharmony_ci      pipe_semaphore_destroy(&rast->tasks[i].work_done);
1353bf215546Sopenharmony_ci   }
1354bf215546Sopenharmony_ci   for (unsigned i = 0; i < MAX2(1, rast->num_threads); i++) {
1355bf215546Sopenharmony_ci      align_free(rast->tasks[i].thread_data.cache);
1356bf215546Sopenharmony_ci   }
1357bf215546Sopenharmony_ci
1358bf215546Sopenharmony_ci   lp_fence_reference(&rast->last_fence, NULL);
1359bf215546Sopenharmony_ci
1360bf215546Sopenharmony_ci   /* for synchronizing rasterization threads */
1361bf215546Sopenharmony_ci   if (rast->num_threads > 0) {
1362bf215546Sopenharmony_ci      util_barrier_destroy(&rast->barrier);
1363bf215546Sopenharmony_ci   }
1364bf215546Sopenharmony_ci
1365bf215546Sopenharmony_ci   lp_scene_queue_destroy(rast->full_scenes);
1366bf215546Sopenharmony_ci
1367bf215546Sopenharmony_ci   FREE(rast);
1368bf215546Sopenharmony_ci}
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_civoid lp_rast_fence(struct lp_rasterizer *rast,
1371bf215546Sopenharmony_ci                   struct lp_fence **fence)
1372bf215546Sopenharmony_ci{
1373bf215546Sopenharmony_ci   if (fence)
1374bf215546Sopenharmony_ci      lp_fence_reference((struct lp_fence **)fence, rast->last_fence);
1375bf215546Sopenharmony_ci}
1376