1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include <limits.h> 29bf215546Sopenharmony_ci#include "util/u_memory.h" 30bf215546Sopenharmony_ci#include "util/u_math.h" 31bf215546Sopenharmony_ci#include "util/u_rect.h" 32bf215546Sopenharmony_ci#include "util/u_surface.h" 33bf215546Sopenharmony_ci#include "util/u_pack_color.h" 34bf215546Sopenharmony_ci#include "util/u_string.h" 35bf215546Sopenharmony_ci#include "util/u_thread.h" 36bf215546Sopenharmony_ci#include "util/u_memset.h" 37bf215546Sopenharmony_ci#include "util/os_time.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#include "lp_scene_queue.h" 40bf215546Sopenharmony_ci#include "lp_context.h" 41bf215546Sopenharmony_ci#include "lp_debug.h" 42bf215546Sopenharmony_ci#include "lp_fence.h" 43bf215546Sopenharmony_ci#include "lp_perf.h" 44bf215546Sopenharmony_ci#include "lp_query.h" 45bf215546Sopenharmony_ci#include "lp_rast.h" 46bf215546Sopenharmony_ci#include "lp_rast_priv.h" 47bf215546Sopenharmony_ci#include "gallivm/lp_bld_format.h" 48bf215546Sopenharmony_ci#include "gallivm/lp_bld_debug.h" 49bf215546Sopenharmony_ci#include "lp_scene.h" 50bf215546Sopenharmony_ci#include "lp_screen.h" 51bf215546Sopenharmony_ci#include "lp_tex_sample.h" 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci#ifdef DEBUG 55bf215546Sopenharmony_ciint jit_line = 0; 56bf215546Sopenharmony_ciconst struct lp_rast_state *jit_state = NULL; 57bf215546Sopenharmony_ciconst struct lp_rasterizer_task *jit_task = NULL; 58bf215546Sopenharmony_ci#endif 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ciconst float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 }, 61bf215546Sopenharmony_ci { 0.875, 0.375 }, 62bf215546Sopenharmony_ci { 0.125, 0.625 }, 63bf215546Sopenharmony_ci { 0.625, 0.875 } }; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci/** 66bf215546Sopenharmony_ci * Begin rasterizing a scene. 67bf215546Sopenharmony_ci * Called once per scene by one thread. 68bf215546Sopenharmony_ci */ 69bf215546Sopenharmony_cistatic void 70bf215546Sopenharmony_cilp_rast_begin(struct lp_rasterizer *rast, 71bf215546Sopenharmony_ci struct lp_scene *scene) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci rast->curr_scene = scene; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci lp_scene_begin_rasterization(scene); 78bf215546Sopenharmony_ci lp_scene_bin_iter_begin(scene); 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cistatic void 83bf215546Sopenharmony_cilp_rast_end(struct lp_rasterizer *rast) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci rast->curr_scene = NULL; 86bf215546Sopenharmony_ci} 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci/** 90bf215546Sopenharmony_ci * Beginning rasterization of a tile. 91bf215546Sopenharmony_ci * \param x window X position of the tile, in pixels 92bf215546Sopenharmony_ci * \param y window Y position of the tile, in pixels 93bf215546Sopenharmony_ci */ 94bf215546Sopenharmony_cistatic void 95bf215546Sopenharmony_cilp_rast_tile_begin(struct lp_rasterizer_task *task, 96bf215546Sopenharmony_ci const struct cmd_bin *bin, 97bf215546Sopenharmony_ci int x, int y) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci struct lp_scene *scene = task->scene; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci task->bin = bin; 104bf215546Sopenharmony_ci task->x = x * TILE_SIZE; 105bf215546Sopenharmony_ci task->y = y * TILE_SIZE; 106bf215546Sopenharmony_ci task->width = TILE_SIZE + x * TILE_SIZE > scene->fb.width ? 107bf215546Sopenharmony_ci scene->fb.width - x * TILE_SIZE : TILE_SIZE; 108bf215546Sopenharmony_ci task->height = TILE_SIZE + y * TILE_SIZE > scene->fb.height ? 109bf215546Sopenharmony_ci scene->fb.height - y * TILE_SIZE : TILE_SIZE; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci task->thread_data.vis_counter = 0; 112bf215546Sopenharmony_ci task->thread_data.ps_invocations = 0; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci for (unsigned i = 0; i < scene->fb.nr_cbufs; i++) { 115bf215546Sopenharmony_ci if (scene->fb.cbufs[i]) { 116bf215546Sopenharmony_ci task->color_tiles[i] = scene->cbufs[i].map + 117bf215546Sopenharmony_ci scene->cbufs[i].stride * task->y + 118bf215546Sopenharmony_ci scene->cbufs[i].format_bytes * task->x; 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci if (scene->fb.zsbuf) { 122bf215546Sopenharmony_ci task->depth_tile = scene->zsbuf.map + 123bf215546Sopenharmony_ci scene->zsbuf.stride * task->y + 124bf215546Sopenharmony_ci scene->zsbuf.format_bytes * task->x; 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci/** 130bf215546Sopenharmony_ci * Clear the rasterizer's current color tile. 131bf215546Sopenharmony_ci * This is a bin command called during bin processing. 132bf215546Sopenharmony_ci * Clear commands always clear all bound layers. 133bf215546Sopenharmony_ci */ 134bf215546Sopenharmony_cistatic void 135bf215546Sopenharmony_cilp_rast_clear_color(struct lp_rasterizer_task *task, 136bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci const struct lp_scene *scene = task->scene; 139bf215546Sopenharmony_ci const unsigned cbuf = arg.clear_rb->cbuf; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* we never bin clear commands for non-existing buffers */ 142bf215546Sopenharmony_ci assert(cbuf < scene->fb.nr_cbufs); 143bf215546Sopenharmony_ci assert(scene->fb.cbufs[cbuf]); 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci const enum pipe_format format = scene->fb.cbufs[cbuf]->format; 146bf215546Sopenharmony_ci union util_color uc = arg.clear_rb->color_val; 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci /* 149bf215546Sopenharmony_ci * this is pretty rough since we have target format (bunch of bytes...) 150bf215546Sopenharmony_ci * here. dump it as raw 4 dwords. 151bf215546Sopenharmony_ci */ 152bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, 153bf215546Sopenharmony_ci "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", 154bf215546Sopenharmony_ci __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) { 157bf215546Sopenharmony_ci void *map = (char *) scene->cbufs[cbuf].map 158bf215546Sopenharmony_ci + scene->cbufs[cbuf].sample_stride * s; 159bf215546Sopenharmony_ci util_fill_box(map, 160bf215546Sopenharmony_ci format, 161bf215546Sopenharmony_ci scene->cbufs[cbuf].stride, 162bf215546Sopenharmony_ci scene->cbufs[cbuf].layer_stride, 163bf215546Sopenharmony_ci task->x, 164bf215546Sopenharmony_ci task->y, 165bf215546Sopenharmony_ci 0, 166bf215546Sopenharmony_ci task->width, 167bf215546Sopenharmony_ci task->height, 168bf215546Sopenharmony_ci scene->fb_max_layer + 1, 169bf215546Sopenharmony_ci &uc); 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci /* this will increase for each rb which probably doesn't mean much */ 173bf215546Sopenharmony_ci LP_COUNT(nr_color_tile_clear); 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci/** 178bf215546Sopenharmony_ci * Clear the rasterizer's current z/stencil tile. 179bf215546Sopenharmony_ci * This is a bin command called during bin processing. 180bf215546Sopenharmony_ci * Clear commands always clear all bound layers. 181bf215546Sopenharmony_ci */ 182bf215546Sopenharmony_cistatic void 183bf215546Sopenharmony_cilp_rast_clear_zstencil(struct lp_rasterizer_task *task, 184bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 185bf215546Sopenharmony_ci{ 186bf215546Sopenharmony_ci const struct lp_scene *scene = task->scene; 187bf215546Sopenharmony_ci uint64_t clear_value64 = arg.clear_zstencil.value; 188bf215546Sopenharmony_ci uint64_t clear_mask64 = arg.clear_zstencil.mask; 189bf215546Sopenharmony_ci uint32_t clear_value = (uint32_t) clear_value64; 190bf215546Sopenharmony_ci uint32_t clear_mask = (uint32_t) clear_mask64; 191bf215546Sopenharmony_ci const unsigned height = task->height; 192bf215546Sopenharmony_ci const unsigned width = task->width; 193bf215546Sopenharmony_ci const unsigned dst_stride = scene->zsbuf.stride; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", 196bf215546Sopenharmony_ci __FUNCTION__, clear_value, clear_mask); 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci /* 199bf215546Sopenharmony_ci * Clear the area of the depth/depth buffer matching this tile. 200bf215546Sopenharmony_ci */ 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci if (scene->fb.zsbuf) { 203bf215546Sopenharmony_ci for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) { 204bf215546Sopenharmony_ci uint8_t *dst_layer = 205bf215546Sopenharmony_ci task->depth_tile + (s * scene->zsbuf.sample_stride); 206bf215546Sopenharmony_ci const unsigned block_size = 207bf215546Sopenharmony_ci util_format_get_blocksize(scene->fb.zsbuf->format); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci clear_value &= clear_mask; 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci for (unsigned layer = 0; layer <= scene->fb_max_layer; layer++) { 212bf215546Sopenharmony_ci uint8_t *dst = dst_layer; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci switch (block_size) { 215bf215546Sopenharmony_ci case 1: 216bf215546Sopenharmony_ci assert(clear_mask == 0xff); 217bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 218bf215546Sopenharmony_ci uint8_t *row = (uint8_t *)dst; 219bf215546Sopenharmony_ci memset(row, (uint8_t) clear_value, width); 220bf215546Sopenharmony_ci dst += dst_stride; 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci break; 223bf215546Sopenharmony_ci case 2: 224bf215546Sopenharmony_ci if (clear_mask == 0xffff) { 225bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 226bf215546Sopenharmony_ci uint16_t *row = (uint16_t *)dst; 227bf215546Sopenharmony_ci for (unsigned j = 0; j < width; j++) 228bf215546Sopenharmony_ci *row++ = (uint16_t) clear_value; 229bf215546Sopenharmony_ci dst += dst_stride; 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci else { 233bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 234bf215546Sopenharmony_ci uint16_t *row = (uint16_t *)dst; 235bf215546Sopenharmony_ci for (unsigned j = 0; j < width; j++) { 236bf215546Sopenharmony_ci uint16_t tmp = ~clear_mask & *row; 237bf215546Sopenharmony_ci *row++ = clear_value | tmp; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci dst += dst_stride; 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci } 242bf215546Sopenharmony_ci break; 243bf215546Sopenharmony_ci case 4: 244bf215546Sopenharmony_ci if (clear_mask == 0xffffffff) { 245bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 246bf215546Sopenharmony_ci util_memset32(dst, clear_value, width); 247bf215546Sopenharmony_ci dst += dst_stride; 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci else { 251bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 252bf215546Sopenharmony_ci uint32_t *row = (uint32_t *)dst; 253bf215546Sopenharmony_ci for (unsigned j = 0; j < width; j++) { 254bf215546Sopenharmony_ci uint32_t tmp = ~clear_mask & *row; 255bf215546Sopenharmony_ci *row++ = clear_value | tmp; 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci dst += dst_stride; 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci break; 261bf215546Sopenharmony_ci case 8: 262bf215546Sopenharmony_ci clear_value64 &= clear_mask64; 263bf215546Sopenharmony_ci if (clear_mask64 == 0xffffffffffULL) { 264bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 265bf215546Sopenharmony_ci util_memset64(dst, clear_value64, width); 266bf215546Sopenharmony_ci dst += dst_stride; 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci else { 270bf215546Sopenharmony_ci for (unsigned i = 0; i < height; i++) { 271bf215546Sopenharmony_ci uint64_t *row = (uint64_t *)dst; 272bf215546Sopenharmony_ci for (unsigned j = 0; j < width; j++) { 273bf215546Sopenharmony_ci uint64_t tmp = ~clear_mask64 & *row; 274bf215546Sopenharmony_ci *row++ = clear_value64 | tmp; 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci dst += dst_stride; 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci break; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci default: 282bf215546Sopenharmony_ci assert(0); 283bf215546Sopenharmony_ci break; 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci dst_layer += scene->zsbuf.layer_stride; 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci} 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci/** 293bf215546Sopenharmony_ci * Run the shader on all blocks in a tile. This is used when a tile is 294bf215546Sopenharmony_ci * completely contained inside a triangle. 295bf215546Sopenharmony_ci * This is a bin command called during bin processing. 296bf215546Sopenharmony_ci */ 297bf215546Sopenharmony_cistatic void 298bf215546Sopenharmony_cilp_rast_shade_tile(struct lp_rasterizer_task *task, 299bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 300bf215546Sopenharmony_ci{ 301bf215546Sopenharmony_ci const struct lp_scene *scene = task->scene; 302bf215546Sopenharmony_ci const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 303bf215546Sopenharmony_ci const unsigned tile_x = task->x, tile_y = task->y; 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci if (inputs->disable) { 306bf215546Sopenharmony_ci /* This command was partially binned and has been disabled */ 307bf215546Sopenharmony_ci return; 308bf215546Sopenharmony_ci } 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci const struct lp_rast_state *state = task->state; 313bf215546Sopenharmony_ci assert(state); 314bf215546Sopenharmony_ci if (!state) { 315bf215546Sopenharmony_ci return; 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci const struct lp_fragment_shader_variant *variant = state->variant; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci /* render the whole 64x64 tile in 4x4 chunks */ 321bf215546Sopenharmony_ci for (unsigned y = 0; y < task->height; y += 4){ 322bf215546Sopenharmony_ci for (unsigned x = 0; x < task->width; x += 4) { 323bf215546Sopenharmony_ci /* color buffer */ 324bf215546Sopenharmony_ci uint8_t *color[PIPE_MAX_COLOR_BUFS]; 325bf215546Sopenharmony_ci unsigned stride[PIPE_MAX_COLOR_BUFS]; 326bf215546Sopenharmony_ci unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; 327bf215546Sopenharmony_ci for (unsigned i = 0; i < scene->fb.nr_cbufs; i++){ 328bf215546Sopenharmony_ci if (scene->fb.cbufs[i]) { 329bf215546Sopenharmony_ci stride[i] = scene->cbufs[i].stride; 330bf215546Sopenharmony_ci sample_stride[i] = scene->cbufs[i].sample_stride; 331bf215546Sopenharmony_ci color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, 332bf215546Sopenharmony_ci tile_y + y, 333bf215546Sopenharmony_ci inputs->layer + inputs->view_index); 334bf215546Sopenharmony_ci } 335bf215546Sopenharmony_ci else { 336bf215546Sopenharmony_ci stride[i] = 0; 337bf215546Sopenharmony_ci sample_stride[i] = 0; 338bf215546Sopenharmony_ci color[i] = NULL; 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci /* depth buffer */ 343bf215546Sopenharmony_ci uint8_t *depth = NULL; 344bf215546Sopenharmony_ci unsigned depth_stride = 0; 345bf215546Sopenharmony_ci unsigned depth_sample_stride = 0; 346bf215546Sopenharmony_ci if (scene->zsbuf.map) { 347bf215546Sopenharmony_ci depth = lp_rast_get_depth_block_pointer(task, tile_x + x, 348bf215546Sopenharmony_ci tile_y + y, 349bf215546Sopenharmony_ci inputs->layer + inputs->view_index); 350bf215546Sopenharmony_ci depth_stride = scene->zsbuf.stride; 351bf215546Sopenharmony_ci depth_sample_stride = scene->zsbuf.sample_stride; 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci uint64_t mask = 0; 355bf215546Sopenharmony_ci for (unsigned i = 0; i < scene->fb_max_samples; i++) 356bf215546Sopenharmony_ci mask |= (uint64_t)(0xffff) << (16 * i); 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci /* Propagate non-interpolated raster state. */ 359bf215546Sopenharmony_ci task->thread_data.raster_state.viewport_index = inputs->viewport_index; 360bf215546Sopenharmony_ci task->thread_data.raster_state.view_index = inputs->view_index; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci /* run shader on 4x4 block */ 363bf215546Sopenharmony_ci BEGIN_JIT_CALL(state, task); 364bf215546Sopenharmony_ci variant->jit_function[RAST_WHOLE](&state->jit_context, 365bf215546Sopenharmony_ci tile_x + x, tile_y + y, 366bf215546Sopenharmony_ci inputs->frontfacing, 367bf215546Sopenharmony_ci GET_A0(inputs), 368bf215546Sopenharmony_ci GET_DADX(inputs), 369bf215546Sopenharmony_ci GET_DADY(inputs), 370bf215546Sopenharmony_ci color, 371bf215546Sopenharmony_ci depth, 372bf215546Sopenharmony_ci mask, 373bf215546Sopenharmony_ci &task->thread_data, 374bf215546Sopenharmony_ci stride, 375bf215546Sopenharmony_ci depth_stride, 376bf215546Sopenharmony_ci sample_stride, 377bf215546Sopenharmony_ci depth_sample_stride); 378bf215546Sopenharmony_ci END_JIT_CALL(); 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci } 381bf215546Sopenharmony_ci} 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci/** 385bf215546Sopenharmony_ci * Run the shader on all blocks in a tile. This is used when a tile is 386bf215546Sopenharmony_ci * completely contained inside a triangle, and the shader is opaque. 387bf215546Sopenharmony_ci * This is a bin command called during bin processing. 388bf215546Sopenharmony_ci */ 389bf215546Sopenharmony_cistatic void 390bf215546Sopenharmony_cilp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, 391bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 392bf215546Sopenharmony_ci{ 393bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci assert(task->state); 396bf215546Sopenharmony_ci if (!task->state) { 397bf215546Sopenharmony_ci return; 398bf215546Sopenharmony_ci } 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci lp_rast_shade_tile(task, arg); 401bf215546Sopenharmony_ci} 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci/** 405bf215546Sopenharmony_ci * Compute shading for a 4x4 block of pixels inside a triangle. 406bf215546Sopenharmony_ci * This is a bin command called during bin processing. 407bf215546Sopenharmony_ci * \param x X position of quad in window coords 408bf215546Sopenharmony_ci * \param y Y position of quad in window coords 409bf215546Sopenharmony_ci */ 410bf215546Sopenharmony_civoid 411bf215546Sopenharmony_cilp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task, 412bf215546Sopenharmony_ci const struct lp_rast_shader_inputs *inputs, 413bf215546Sopenharmony_ci unsigned x, unsigned y, 414bf215546Sopenharmony_ci uint64_t mask) 415bf215546Sopenharmony_ci{ 416bf215546Sopenharmony_ci const struct lp_rast_state *state = task->state; 417bf215546Sopenharmony_ci const struct lp_fragment_shader_variant *variant = state->variant; 418bf215546Sopenharmony_ci const struct lp_scene *scene = task->scene; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci assert(state); 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci /* Sanity checks */ 423bf215546Sopenharmony_ci assert(x < scene->tiles_x * TILE_SIZE); 424bf215546Sopenharmony_ci assert(y < scene->tiles_y * TILE_SIZE); 425bf215546Sopenharmony_ci assert(x % TILE_VECTOR_WIDTH == 0); 426bf215546Sopenharmony_ci assert(y % TILE_VECTOR_HEIGHT == 0); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci assert((x % 4) == 0); 429bf215546Sopenharmony_ci assert((y % 4) == 0); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci /* color buffer */ 432bf215546Sopenharmony_ci uint8_t *color[PIPE_MAX_COLOR_BUFS]; 433bf215546Sopenharmony_ci unsigned stride[PIPE_MAX_COLOR_BUFS]; 434bf215546Sopenharmony_ci unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; 435bf215546Sopenharmony_ci for (unsigned i = 0; i < scene->fb.nr_cbufs; i++) { 436bf215546Sopenharmony_ci if (scene->fb.cbufs[i]) { 437bf215546Sopenharmony_ci stride[i] = scene->cbufs[i].stride; 438bf215546Sopenharmony_ci sample_stride[i] = scene->cbufs[i].sample_stride; 439bf215546Sopenharmony_ci color[i] = lp_rast_get_color_block_pointer(task, i, x, y, 440bf215546Sopenharmony_ci inputs->layer + inputs->view_index); 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci else { 443bf215546Sopenharmony_ci stride[i] = 0; 444bf215546Sopenharmony_ci sample_stride[i] = 0; 445bf215546Sopenharmony_ci color[i] = NULL; 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* depth buffer */ 450bf215546Sopenharmony_ci uint8_t *depth = NULL; 451bf215546Sopenharmony_ci unsigned depth_stride = 0; 452bf215546Sopenharmony_ci unsigned depth_sample_stride = 0; 453bf215546Sopenharmony_ci if (scene->zsbuf.map) { 454bf215546Sopenharmony_ci depth_stride = scene->zsbuf.stride; 455bf215546Sopenharmony_ci depth_sample_stride = scene->zsbuf.sample_stride; 456bf215546Sopenharmony_ci depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index); 457bf215546Sopenharmony_ci } 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci /* 462bf215546Sopenharmony_ci * The rasterizer may produce fragments outside our 463bf215546Sopenharmony_ci * allocated 4x4 blocks hence need to filter them out here. 464bf215546Sopenharmony_ci */ 465bf215546Sopenharmony_ci if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { 466bf215546Sopenharmony_ci /* Propagate non-interpolated raster state. */ 467bf215546Sopenharmony_ci task->thread_data.raster_state.viewport_index = inputs->viewport_index; 468bf215546Sopenharmony_ci task->thread_data.raster_state.view_index = inputs->view_index; 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci /* run shader on 4x4 block */ 471bf215546Sopenharmony_ci BEGIN_JIT_CALL(state, task); 472bf215546Sopenharmony_ci variant->jit_function[RAST_EDGE_TEST](&state->jit_context, 473bf215546Sopenharmony_ci x, y, 474bf215546Sopenharmony_ci inputs->frontfacing, 475bf215546Sopenharmony_ci GET_A0(inputs), 476bf215546Sopenharmony_ci GET_DADX(inputs), 477bf215546Sopenharmony_ci GET_DADY(inputs), 478bf215546Sopenharmony_ci color, 479bf215546Sopenharmony_ci depth, 480bf215546Sopenharmony_ci mask, 481bf215546Sopenharmony_ci &task->thread_data, 482bf215546Sopenharmony_ci stride, 483bf215546Sopenharmony_ci depth_stride, 484bf215546Sopenharmony_ci sample_stride, 485bf215546Sopenharmony_ci depth_sample_stride); 486bf215546Sopenharmony_ci END_JIT_CALL(); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci} 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_civoid 492bf215546Sopenharmony_cilp_rast_shade_quads_mask(struct lp_rasterizer_task *task, 493bf215546Sopenharmony_ci const struct lp_rast_shader_inputs *inputs, 494bf215546Sopenharmony_ci unsigned x, unsigned y, 495bf215546Sopenharmony_ci unsigned mask) 496bf215546Sopenharmony_ci{ 497bf215546Sopenharmony_ci uint64_t new_mask = 0; 498bf215546Sopenharmony_ci for (unsigned i = 0; i < task->scene->fb_max_samples; i++) 499bf215546Sopenharmony_ci new_mask |= ((uint64_t)mask) << (16 * i); 500bf215546Sopenharmony_ci lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask); 501bf215546Sopenharmony_ci} 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci/** 505bf215546Sopenharmony_ci * Directly copy pixels from a texture to the destination color buffer. 506bf215546Sopenharmony_ci * This is a bin command called during bin processing. 507bf215546Sopenharmony_ci */ 508bf215546Sopenharmony_cistatic void 509bf215546Sopenharmony_cilp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task, 510bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 511bf215546Sopenharmony_ci{ 512bf215546Sopenharmony_ci const struct lp_scene *scene = task->scene; 513bf215546Sopenharmony_ci const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 514bf215546Sopenharmony_ci const struct lp_rast_state *state = task->state; 515bf215546Sopenharmony_ci struct lp_fragment_shader_variant *variant = state->variant; 516bf215546Sopenharmony_ci const struct lp_jit_texture *texture = &state->jit_context.textures[0]; 517bf215546Sopenharmony_ci struct pipe_surface *cbuf = scene->fb.cbufs[0]; 518bf215546Sopenharmony_ci const unsigned face_slice = cbuf->u.tex.first_layer; 519bf215546Sopenharmony_ci const unsigned level = cbuf->u.tex.level; 520bf215546Sopenharmony_ci struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci if (inputs->disable) { 525bf215546Sopenharmony_ci /* This command was partially binned and has been disabled */ 526bf215546Sopenharmony_ci return; 527bf215546Sopenharmony_ci } 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci uint8_t *dst = llvmpipe_get_texture_image_address(lpt, face_slice, level); 530bf215546Sopenharmony_ci if (!dst) 531bf215546Sopenharmony_ci return; 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci const unsigned dst_stride = lpt->row_stride[level]; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci const uint8_t *src = texture->base; 536bf215546Sopenharmony_ci const unsigned src_stride = texture->row_stride[0]; 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci int src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f); 539bf215546Sopenharmony_ci int src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f); 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci src_x += task->x; 542bf215546Sopenharmony_ci src_y += task->y; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if (0) { 545bf215546Sopenharmony_ci union util_color uc; 546bf215546Sopenharmony_ci uc.ui[0] = 0xff0000ff; 547bf215546Sopenharmony_ci util_fill_rect(dst, 548bf215546Sopenharmony_ci cbuf->format, 549bf215546Sopenharmony_ci dst_stride, 550bf215546Sopenharmony_ci task->x, 551bf215546Sopenharmony_ci task->y, 552bf215546Sopenharmony_ci task->width, 553bf215546Sopenharmony_ci task->height, 554bf215546Sopenharmony_ci &uc); 555bf215546Sopenharmony_ci return; 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci if (src_x >= 0 && 559bf215546Sopenharmony_ci src_y >= 0 && 560bf215546Sopenharmony_ci src_x + task->width <= texture->width && 561bf215546Sopenharmony_ci src_y + task->height <= texture->height) { 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA || 564bf215546Sopenharmony_ci (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 && 565bf215546Sopenharmony_ci cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) { 566bf215546Sopenharmony_ci util_copy_rect(dst, 567bf215546Sopenharmony_ci cbuf->format, 568bf215546Sopenharmony_ci dst_stride, 569bf215546Sopenharmony_ci task->x, task->y, 570bf215546Sopenharmony_ci task->width, task->height, 571bf215546Sopenharmony_ci src, src_stride, 572bf215546Sopenharmony_ci src_x, src_y); 573bf215546Sopenharmony_ci return; 574bf215546Sopenharmony_ci } 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) { 577bf215546Sopenharmony_ci if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) { 578bf215546Sopenharmony_ci dst += task->x * 4; 579bf215546Sopenharmony_ci src += src_x * 4; 580bf215546Sopenharmony_ci dst += task->y * dst_stride; 581bf215546Sopenharmony_ci src += src_y * src_stride; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci for (int y = 0; y < task->height; ++y) { 584bf215546Sopenharmony_ci const uint32_t *src_row = (const uint32_t *)src; 585bf215546Sopenharmony_ci uint32_t *dst_row = (uint32_t *)dst; 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci for (int x = 0; x < task->width; ++x) { 588bf215546Sopenharmony_ci *dst_row++ = *src_row++ | 0xff000000; 589bf215546Sopenharmony_ci } 590bf215546Sopenharmony_ci dst += dst_stride; 591bf215546Sopenharmony_ci src += src_stride; 592bf215546Sopenharmony_ci } 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci return; 595bf215546Sopenharmony_ci } 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci } 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci /* 601bf215546Sopenharmony_ci * Fall back to the jit shaders. 602bf215546Sopenharmony_ci */ 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci lp_rast_shade_tile_opaque(task, arg); 605bf215546Sopenharmony_ci} 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_cistatic void 609bf215546Sopenharmony_cilp_rast_blit_tile(struct lp_rasterizer_task *task, 610bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 611bf215546Sopenharmony_ci{ 612bf215546Sopenharmony_ci /* This kindof just works, but isn't efficient: 613bf215546Sopenharmony_ci */ 614bf215546Sopenharmony_ci lp_rast_blit_tile_to_dest(task, arg); 615bf215546Sopenharmony_ci} 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci/** 619bf215546Sopenharmony_ci * Begin a new occlusion query. 620bf215546Sopenharmony_ci * This is a bin command put in all bins. 621bf215546Sopenharmony_ci * Called per thread. 622bf215546Sopenharmony_ci */ 623bf215546Sopenharmony_cistatic void 624bf215546Sopenharmony_cilp_rast_begin_query(struct lp_rasterizer_task *task, 625bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 626bf215546Sopenharmony_ci{ 627bf215546Sopenharmony_ci struct llvmpipe_query *pq = arg.query_obj; 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_ci switch (pq->type) { 630bf215546Sopenharmony_ci case PIPE_QUERY_OCCLUSION_COUNTER: 631bf215546Sopenharmony_ci case PIPE_QUERY_OCCLUSION_PREDICATE: 632bf215546Sopenharmony_ci case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 633bf215546Sopenharmony_ci pq->start[task->thread_index] = task->thread_data.vis_counter; 634bf215546Sopenharmony_ci break; 635bf215546Sopenharmony_ci case PIPE_QUERY_PIPELINE_STATISTICS: 636bf215546Sopenharmony_ci pq->start[task->thread_index] = task->thread_data.ps_invocations; 637bf215546Sopenharmony_ci break; 638bf215546Sopenharmony_ci case PIPE_QUERY_TIME_ELAPSED: 639bf215546Sopenharmony_ci pq->start[task->thread_index] = os_time_get_nano(); 640bf215546Sopenharmony_ci break; 641bf215546Sopenharmony_ci default: 642bf215546Sopenharmony_ci assert(0); 643bf215546Sopenharmony_ci break; 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci} 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci/** 649bf215546Sopenharmony_ci * End the current occlusion query. 650bf215546Sopenharmony_ci * This is a bin command put in all bins. 651bf215546Sopenharmony_ci * Called per thread. 652bf215546Sopenharmony_ci */ 653bf215546Sopenharmony_cistatic void 654bf215546Sopenharmony_cilp_rast_end_query(struct lp_rasterizer_task *task, 655bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 656bf215546Sopenharmony_ci{ 657bf215546Sopenharmony_ci struct llvmpipe_query *pq = arg.query_obj; 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci switch (pq->type) { 660bf215546Sopenharmony_ci case PIPE_QUERY_OCCLUSION_COUNTER: 661bf215546Sopenharmony_ci case PIPE_QUERY_OCCLUSION_PREDICATE: 662bf215546Sopenharmony_ci case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 663bf215546Sopenharmony_ci pq->end[task->thread_index] += 664bf215546Sopenharmony_ci task->thread_data.vis_counter - pq->start[task->thread_index]; 665bf215546Sopenharmony_ci pq->start[task->thread_index] = 0; 666bf215546Sopenharmony_ci break; 667bf215546Sopenharmony_ci case PIPE_QUERY_TIMESTAMP: 668bf215546Sopenharmony_ci case PIPE_QUERY_TIME_ELAPSED: 669bf215546Sopenharmony_ci pq->end[task->thread_index] = os_time_get_nano(); 670bf215546Sopenharmony_ci break; 671bf215546Sopenharmony_ci case PIPE_QUERY_PIPELINE_STATISTICS: 672bf215546Sopenharmony_ci pq->end[task->thread_index] += 673bf215546Sopenharmony_ci task->thread_data.ps_invocations - pq->start[task->thread_index]; 674bf215546Sopenharmony_ci pq->start[task->thread_index] = 0; 675bf215546Sopenharmony_ci break; 676bf215546Sopenharmony_ci default: 677bf215546Sopenharmony_ci assert(0); 678bf215546Sopenharmony_ci break; 679bf215546Sopenharmony_ci } 680bf215546Sopenharmony_ci} 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_civoid 684bf215546Sopenharmony_cilp_rast_set_state(struct lp_rasterizer_task *task, 685bf215546Sopenharmony_ci const union lp_rast_cmd_arg arg) 686bf215546Sopenharmony_ci{ 687bf215546Sopenharmony_ci task->state = arg.set_state; 688bf215546Sopenharmony_ci} 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci/** 692bf215546Sopenharmony_ci * Called when we're done writing to a color tile. 693bf215546Sopenharmony_ci */ 694bf215546Sopenharmony_cistatic void 695bf215546Sopenharmony_cilp_rast_tile_end(struct lp_rasterizer_task *task) 696bf215546Sopenharmony_ci{ 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci for (unsigned i = 0; i < task->scene->num_active_queries; ++i) { 699bf215546Sopenharmony_ci lp_rast_end_query(task, 700bf215546Sopenharmony_ci lp_rast_arg_query(task->scene->active_queries[i])); 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci /* debug */ 704bf215546Sopenharmony_ci memset(task->color_tiles, 0, sizeof(task->color_tiles)); 705bf215546Sopenharmony_ci task->depth_tile = NULL; 706bf215546Sopenharmony_ci task->bin = NULL; 707bf215546Sopenharmony_ci} 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci/* Currently have two rendering paths only - the general case triangle 711bf215546Sopenharmony_ci * path and the super-specialized blit/clear path. 712bf215546Sopenharmony_ci */ 713bf215546Sopenharmony_ci#define TRI ((LP_RAST_FLAGS_TRI <<1)-1) /* general case */ 714bf215546Sopenharmony_ci#define RECT ((LP_RAST_FLAGS_RECT<<1)-1) /* direct rectangle rasterizer */ 715bf215546Sopenharmony_ci#define BLIT ((LP_RAST_FLAGS_BLIT<<1)-1) /* write direct-to-dest */ 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_cistatic const unsigned 718bf215546Sopenharmony_cirast_flags[] = { 719bf215546Sopenharmony_ci BLIT, /* clear color */ 720bf215546Sopenharmony_ci TRI, /* clear zstencil */ 721bf215546Sopenharmony_ci TRI, /* triangle_1 */ 722bf215546Sopenharmony_ci TRI, /* triangle_2 */ 723bf215546Sopenharmony_ci TRI, /* triangle_3 */ 724bf215546Sopenharmony_ci TRI, /* triangle_4 */ 725bf215546Sopenharmony_ci TRI, /* triangle_5 */ 726bf215546Sopenharmony_ci TRI, /* triangle_6 */ 727bf215546Sopenharmony_ci TRI, /* triangle_7 */ 728bf215546Sopenharmony_ci TRI, /* triangle_8 */ 729bf215546Sopenharmony_ci TRI, /* triangle_3_4 */ 730bf215546Sopenharmony_ci TRI, /* triangle_3_16 */ 731bf215546Sopenharmony_ci TRI, /* triangle_4_16 */ 732bf215546Sopenharmony_ci RECT, /* shade_tile */ 733bf215546Sopenharmony_ci RECT, /* shade_tile_opaque */ 734bf215546Sopenharmony_ci TRI, /* begin_query */ 735bf215546Sopenharmony_ci TRI, /* end_query */ 736bf215546Sopenharmony_ci BLIT, /* set_state, */ 737bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_1 */ 738bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_2 */ 739bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_3 */ 740bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_4 */ 741bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_5 */ 742bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_6 */ 743bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_7 */ 744bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_8 */ 745bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_3_4 */ 746bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_3_16 */ 747bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_32_4_16 */ 748bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_1 */ 749bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_2 */ 750bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_3 */ 751bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_4 */ 752bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_5 */ 753bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_6 */ 754bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_7 */ 755bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_8 */ 756bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_3_4 */ 757bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_3_16 */ 758bf215546Sopenharmony_ci TRI, /* lp_rast_triangle_ms_4_16 */ 759bf215546Sopenharmony_ci RECT, /* rectangle */ 760bf215546Sopenharmony_ci BLIT, /* blit */ 761bf215546Sopenharmony_ci}; 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci/* 764bf215546Sopenharmony_ci */ 765bf215546Sopenharmony_cistatic const lp_rast_cmd_func 766bf215546Sopenharmony_cidispatch_blit[] = { 767bf215546Sopenharmony_ci lp_rast_clear_color, 768bf215546Sopenharmony_ci NULL, /* clear_zstencil */ 769bf215546Sopenharmony_ci NULL, /* triangle_1 */ 770bf215546Sopenharmony_ci NULL, /* triangle_2 */ 771bf215546Sopenharmony_ci NULL, /* triangle_3 */ 772bf215546Sopenharmony_ci NULL, /* triangle_4 */ 773bf215546Sopenharmony_ci NULL, /* triangle_5 */ 774bf215546Sopenharmony_ci NULL, /* triangle_6 */ 775bf215546Sopenharmony_ci NULL, /* triangle_7 */ 776bf215546Sopenharmony_ci NULL, /* triangle_8 */ 777bf215546Sopenharmony_ci NULL, /* triangle_3_4 */ 778bf215546Sopenharmony_ci NULL, /* triangle_3_16 */ 779bf215546Sopenharmony_ci NULL, /* triangle_4_16 */ 780bf215546Sopenharmony_ci NULL, /* shade_tile */ 781bf215546Sopenharmony_ci NULL, /* shade_tile_opaque */ 782bf215546Sopenharmony_ci NULL, /* begin_query */ 783bf215546Sopenharmony_ci NULL, /* end_query */ 784bf215546Sopenharmony_ci lp_rast_set_state, /* set_state */ 785bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_1 */ 786bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_2 */ 787bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_3 */ 788bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_4 */ 789bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_5 */ 790bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_6 */ 791bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_7 */ 792bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_8 */ 793bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_3_4 */ 794bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_3_16 */ 795bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_32_4_16 */ 796bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_1 */ 797bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_2 */ 798bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_3 */ 799bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_4 */ 800bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_5 */ 801bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_6 */ 802bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_7 */ 803bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_8 */ 804bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_3_4 */ 805bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_3_16 */ 806bf215546Sopenharmony_ci NULL, /* lp_rast_triangle_ms_4_16 */ 807bf215546Sopenharmony_ci NULL, /* rectangle */ 808bf215546Sopenharmony_ci lp_rast_blit_tile_to_dest, 809bf215546Sopenharmony_ci}; 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci/* Triangle and general case rasterization: Use the SOA llvm shdaers, 814bf215546Sopenharmony_ci * an active swizzled tile for each color buf, etc. Don't blit/clear 815bf215546Sopenharmony_ci * directly to destination surface as we know there are swizzled 816bf215546Sopenharmony_ci * operations coming. 817bf215546Sopenharmony_ci */ 818bf215546Sopenharmony_cistatic const lp_rast_cmd_func 819bf215546Sopenharmony_cidispatch_tri[] = { 820bf215546Sopenharmony_ci lp_rast_clear_color, 821bf215546Sopenharmony_ci lp_rast_clear_zstencil, 822bf215546Sopenharmony_ci lp_rast_triangle_1, 823bf215546Sopenharmony_ci lp_rast_triangle_2, 824bf215546Sopenharmony_ci lp_rast_triangle_3, 825bf215546Sopenharmony_ci lp_rast_triangle_4, 826bf215546Sopenharmony_ci lp_rast_triangle_5, 827bf215546Sopenharmony_ci lp_rast_triangle_6, 828bf215546Sopenharmony_ci lp_rast_triangle_7, 829bf215546Sopenharmony_ci lp_rast_triangle_8, 830bf215546Sopenharmony_ci lp_rast_triangle_3_4, 831bf215546Sopenharmony_ci lp_rast_triangle_3_16, 832bf215546Sopenharmony_ci lp_rast_triangle_4_16, 833bf215546Sopenharmony_ci lp_rast_shade_tile, 834bf215546Sopenharmony_ci lp_rast_shade_tile_opaque, 835bf215546Sopenharmony_ci lp_rast_begin_query, 836bf215546Sopenharmony_ci lp_rast_end_query, 837bf215546Sopenharmony_ci lp_rast_set_state, 838bf215546Sopenharmony_ci lp_rast_triangle_32_1, 839bf215546Sopenharmony_ci lp_rast_triangle_32_2, 840bf215546Sopenharmony_ci lp_rast_triangle_32_3, 841bf215546Sopenharmony_ci lp_rast_triangle_32_4, 842bf215546Sopenharmony_ci lp_rast_triangle_32_5, 843bf215546Sopenharmony_ci lp_rast_triangle_32_6, 844bf215546Sopenharmony_ci lp_rast_triangle_32_7, 845bf215546Sopenharmony_ci lp_rast_triangle_32_8, 846bf215546Sopenharmony_ci lp_rast_triangle_32_3_4, 847bf215546Sopenharmony_ci lp_rast_triangle_32_3_16, 848bf215546Sopenharmony_ci lp_rast_triangle_32_4_16, 849bf215546Sopenharmony_ci lp_rast_triangle_ms_1, 850bf215546Sopenharmony_ci lp_rast_triangle_ms_2, 851bf215546Sopenharmony_ci lp_rast_triangle_ms_3, 852bf215546Sopenharmony_ci lp_rast_triangle_ms_4, 853bf215546Sopenharmony_ci lp_rast_triangle_ms_5, 854bf215546Sopenharmony_ci lp_rast_triangle_ms_6, 855bf215546Sopenharmony_ci lp_rast_triangle_ms_7, 856bf215546Sopenharmony_ci lp_rast_triangle_ms_8, 857bf215546Sopenharmony_ci lp_rast_triangle_ms_3_4, 858bf215546Sopenharmony_ci lp_rast_triangle_ms_3_16, 859bf215546Sopenharmony_ci lp_rast_triangle_ms_4_16, 860bf215546Sopenharmony_ci lp_rast_rectangle, 861bf215546Sopenharmony_ci lp_rast_blit_tile, 862bf215546Sopenharmony_ci}; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci/* Debug rasterization with most fastpaths disabled. 866bf215546Sopenharmony_ci */ 867bf215546Sopenharmony_cistatic const lp_rast_cmd_func 868bf215546Sopenharmony_cidispatch_tri_debug[] = 869bf215546Sopenharmony_ci{ 870bf215546Sopenharmony_ci lp_rast_clear_color, 871bf215546Sopenharmony_ci lp_rast_clear_zstencil, 872bf215546Sopenharmony_ci lp_rast_triangle_1, 873bf215546Sopenharmony_ci lp_rast_triangle_2, 874bf215546Sopenharmony_ci lp_rast_triangle_3, 875bf215546Sopenharmony_ci lp_rast_triangle_4, 876bf215546Sopenharmony_ci lp_rast_triangle_5, 877bf215546Sopenharmony_ci lp_rast_triangle_6, 878bf215546Sopenharmony_ci lp_rast_triangle_7, 879bf215546Sopenharmony_ci lp_rast_triangle_8, 880bf215546Sopenharmony_ci lp_rast_triangle_3_4, 881bf215546Sopenharmony_ci lp_rast_triangle_3_16, 882bf215546Sopenharmony_ci lp_rast_triangle_4_16, 883bf215546Sopenharmony_ci lp_rast_shade_tile, 884bf215546Sopenharmony_ci lp_rast_shade_tile, 885bf215546Sopenharmony_ci lp_rast_begin_query, 886bf215546Sopenharmony_ci lp_rast_end_query, 887bf215546Sopenharmony_ci lp_rast_set_state, 888bf215546Sopenharmony_ci lp_rast_triangle_32_1, 889bf215546Sopenharmony_ci lp_rast_triangle_32_2, 890bf215546Sopenharmony_ci lp_rast_triangle_32_3, 891bf215546Sopenharmony_ci lp_rast_triangle_32_4, 892bf215546Sopenharmony_ci lp_rast_triangle_32_5, 893bf215546Sopenharmony_ci lp_rast_triangle_32_6, 894bf215546Sopenharmony_ci lp_rast_triangle_32_7, 895bf215546Sopenharmony_ci lp_rast_triangle_32_8, 896bf215546Sopenharmony_ci lp_rast_triangle_32_3_4, 897bf215546Sopenharmony_ci lp_rast_triangle_32_3_16, 898bf215546Sopenharmony_ci lp_rast_triangle_32_4_16, 899bf215546Sopenharmony_ci lp_rast_triangle_ms_1, 900bf215546Sopenharmony_ci lp_rast_triangle_ms_2, 901bf215546Sopenharmony_ci lp_rast_triangle_ms_3, 902bf215546Sopenharmony_ci lp_rast_triangle_ms_4, 903bf215546Sopenharmony_ci lp_rast_triangle_ms_5, 904bf215546Sopenharmony_ci lp_rast_triangle_ms_6, 905bf215546Sopenharmony_ci lp_rast_triangle_ms_7, 906bf215546Sopenharmony_ci lp_rast_triangle_ms_8, 907bf215546Sopenharmony_ci lp_rast_triangle_ms_3_4, 908bf215546Sopenharmony_ci lp_rast_triangle_ms_3_16, 909bf215546Sopenharmony_ci lp_rast_triangle_ms_4_16, 910bf215546Sopenharmony_ci lp_rast_rectangle, 911bf215546Sopenharmony_ci lp_rast_shade_tile, 912bf215546Sopenharmony_ci}; 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_cistruct lp_bin_info 916bf215546Sopenharmony_cilp_characterize_bin(const struct cmd_bin *bin) 917bf215546Sopenharmony_ci{ 918bf215546Sopenharmony_ci unsigned andflags = ~0, j = 0; 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX); 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci for (const struct cmd_block *block = bin->head; block; block = block->next) { 923bf215546Sopenharmony_ci for (unsigned k = 0; k < block->count; k++, j++) { 924bf215546Sopenharmony_ci andflags &= rast_flags[block->cmd[k]]; 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci struct lp_bin_info info; 929bf215546Sopenharmony_ci info.type = andflags; 930bf215546Sopenharmony_ci info.count = j; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci return info; 933bf215546Sopenharmony_ci} 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_cistatic void 937bf215546Sopenharmony_ciblit_rasterize_bin(struct lp_rasterizer_task *task, 938bf215546Sopenharmony_ci const struct cmd_bin *bin) 939bf215546Sopenharmony_ci{ 940bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci if (0) debug_printf("%s\n", __FUNCTION__); 943bf215546Sopenharmony_ci for (const struct cmd_block *block = bin->head; block; block = block->next) { 944bf215546Sopenharmony_ci for (unsigned k = 0; k < block->count; k++) { 945bf215546Sopenharmony_ci dispatch_blit[block->cmd[k]](task, block->arg[k]); 946bf215546Sopenharmony_ci } 947bf215546Sopenharmony_ci } 948bf215546Sopenharmony_ci} 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_cistatic void 952bf215546Sopenharmony_citri_rasterize_bin(struct lp_rasterizer_task *task, 953bf215546Sopenharmony_ci const struct cmd_bin *bin, 954bf215546Sopenharmony_ci int x, int y) 955bf215546Sopenharmony_ci{ 956bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX); 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci for (const struct cmd_block *block = bin->head; block; block = block->next) { 959bf215546Sopenharmony_ci for (unsigned k = 0; k < block->count; k++) { 960bf215546Sopenharmony_ci dispatch_tri[block->cmd[k]](task, block->arg[k]); 961bf215546Sopenharmony_ci } 962bf215546Sopenharmony_ci } 963bf215546Sopenharmony_ci} 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_cistatic void 967bf215546Sopenharmony_cidebug_rasterize_bin(struct lp_rasterizer_task *task, 968bf215546Sopenharmony_ci const struct cmd_bin *bin) 969bf215546Sopenharmony_ci{ 970bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX); 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci for (const struct cmd_block *block = bin->head; block; block = block->next) { 973bf215546Sopenharmony_ci for (unsigned k = 0; k < block->count; k++) { 974bf215546Sopenharmony_ci dispatch_tri_debug[block->cmd[k]](task, block->arg[k]); 975bf215546Sopenharmony_ci } 976bf215546Sopenharmony_ci } 977bf215546Sopenharmony_ci} 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci/** 981bf215546Sopenharmony_ci * Rasterize commands for a single bin. 982bf215546Sopenharmony_ci * \param x, y position of the bin's tile in the framebuffer 983bf215546Sopenharmony_ci * Must be called between lp_rast_begin() and lp_rast_end(). 984bf215546Sopenharmony_ci * Called per thread. 985bf215546Sopenharmony_ci */ 986bf215546Sopenharmony_cistatic void 987bf215546Sopenharmony_cirasterize_bin(struct lp_rasterizer_task *task, 988bf215546Sopenharmony_ci const struct cmd_bin *bin, int x, int y) 989bf215546Sopenharmony_ci{ 990bf215546Sopenharmony_ci struct lp_bin_info info = lp_characterize_bin(bin); 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci lp_rast_tile_begin(task, bin, x, y); 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci if (LP_DEBUG & DEBUG_NO_FASTPATH) { 995bf215546Sopenharmony_ci debug_rasterize_bin(task, bin); 996bf215546Sopenharmony_ci } else if (info.type & LP_RAST_FLAGS_BLIT) { 997bf215546Sopenharmony_ci blit_rasterize_bin(task, bin); 998bf215546Sopenharmony_ci } else if (task->scene->permit_linear_rasterizer && 999bf215546Sopenharmony_ci !(LP_PERF & PERF_NO_RAST_LINEAR) && 1000bf215546Sopenharmony_ci (info.type & LP_RAST_FLAGS_RECT)) { 1001bf215546Sopenharmony_ci lp_linear_rasterize_bin(task, bin); 1002bf215546Sopenharmony_ci } else { 1003bf215546Sopenharmony_ci tri_rasterize_bin(task, bin, x, y); 1004bf215546Sopenharmony_ci } 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci lp_rast_tile_end(task); 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci#ifdef DEBUG 1009bf215546Sopenharmony_ci /* Debug/Perf flags: 1010bf215546Sopenharmony_ci */ 1011bf215546Sopenharmony_ci if (bin->head->count == 1) { 1012bf215546Sopenharmony_ci if (bin->head->cmd[0] == LP_RAST_OP_BLIT) 1013bf215546Sopenharmony_ci LP_COUNT(nr_pure_blit_64); 1014bf215546Sopenharmony_ci else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) 1015bf215546Sopenharmony_ci LP_COUNT(nr_pure_shade_opaque_64); 1016bf215546Sopenharmony_ci else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) 1017bf215546Sopenharmony_ci LP_COUNT(nr_pure_shade_64); 1018bf215546Sopenharmony_ci } 1019bf215546Sopenharmony_ci#endif 1020bf215546Sopenharmony_ci} 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci/* An empty bin is one that just loads the contents of the tile and 1024bf215546Sopenharmony_ci * stores them again unchanged. This typically happens when bins have 1025bf215546Sopenharmony_ci * been flushed for some reason in the middle of a frame, or when 1026bf215546Sopenharmony_ci * incremental updates are being made to a render target. 1027bf215546Sopenharmony_ci * 1028bf215546Sopenharmony_ci * Try to avoid doing pointless work in this case. 1029bf215546Sopenharmony_ci */ 1030bf215546Sopenharmony_cistatic boolean 1031bf215546Sopenharmony_ciis_empty_bin(const struct cmd_bin *bin) 1032bf215546Sopenharmony_ci{ 1033bf215546Sopenharmony_ci return bin->head == NULL; 1034bf215546Sopenharmony_ci} 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci 1037bf215546Sopenharmony_ci/** 1038bf215546Sopenharmony_ci * Rasterize/execute all bins within a scene. 1039bf215546Sopenharmony_ci * Called per thread. 1040bf215546Sopenharmony_ci */ 1041bf215546Sopenharmony_cistatic void 1042bf215546Sopenharmony_cirasterize_scene(struct lp_rasterizer_task *task, 1043bf215546Sopenharmony_ci struct lp_scene *scene) 1044bf215546Sopenharmony_ci{ 1045bf215546Sopenharmony_ci task->scene = scene; 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci /* Clear the cache tags. This should not always be necessary but 1048bf215546Sopenharmony_ci simpler for now. */ 1049bf215546Sopenharmony_ci#if LP_USE_TEXTURE_CACHE 1050bf215546Sopenharmony_ci memset(task->thread_data.cache->cache_tags, 0, 1051bf215546Sopenharmony_ci sizeof(task->thread_data.cache->cache_tags)); 1052bf215546Sopenharmony_ci#if LP_BUILD_FORMAT_CACHE_DEBUG 1053bf215546Sopenharmony_ci task->thread_data.cache->cache_access_total = 0; 1054bf215546Sopenharmony_ci task->thread_data.cache->cache_access_miss = 0; 1055bf215546Sopenharmony_ci#endif 1056bf215546Sopenharmony_ci#endif 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci if (!task->rast->no_rast) { 1059bf215546Sopenharmony_ci /* loop over scene bins, rasterize each */ 1060bf215546Sopenharmony_ci { 1061bf215546Sopenharmony_ci struct cmd_bin *bin; 1062bf215546Sopenharmony_ci int i, j; 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci assert(scene); 1065bf215546Sopenharmony_ci while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { 1066bf215546Sopenharmony_ci if (!is_empty_bin(bin)) 1067bf215546Sopenharmony_ci rasterize_bin(task, bin, i, j); 1068bf215546Sopenharmony_ci } 1069bf215546Sopenharmony_ci } 1070bf215546Sopenharmony_ci } 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci#if LP_BUILD_FORMAT_CACHE_DEBUG 1074bf215546Sopenharmony_ci { 1075bf215546Sopenharmony_ci uint64_t total, miss; 1076bf215546Sopenharmony_ci total = task->thread_data.cache->cache_access_total; 1077bf215546Sopenharmony_ci miss = task->thread_data.cache->cache_access_miss; 1078bf215546Sopenharmony_ci if (total) { 1079bf215546Sopenharmony_ci debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", 1080bf215546Sopenharmony_ci task->thread_index, (long long unsigned)total, 1081bf215546Sopenharmony_ci (long long unsigned)miss, 1082bf215546Sopenharmony_ci (float)(total - miss)/(float)total); 1083bf215546Sopenharmony_ci } 1084bf215546Sopenharmony_ci } 1085bf215546Sopenharmony_ci#endif 1086bf215546Sopenharmony_ci 1087bf215546Sopenharmony_ci if (scene->fence) { 1088bf215546Sopenharmony_ci lp_fence_signal(scene->fence); 1089bf215546Sopenharmony_ci } 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci task->scene = NULL; 1092bf215546Sopenharmony_ci} 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_ci/** 1096bf215546Sopenharmony_ci * Called by setup module when it has something for us to render. 1097bf215546Sopenharmony_ci */ 1098bf215546Sopenharmony_civoid 1099bf215546Sopenharmony_cilp_rast_queue_scene(struct lp_rasterizer *rast, 1100bf215546Sopenharmony_ci struct lp_scene *scene) 1101bf215546Sopenharmony_ci{ 1102bf215546Sopenharmony_ci LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci lp_fence_reference(&rast->last_fence, scene->fence); 1105bf215546Sopenharmony_ci if (rast->last_fence) 1106bf215546Sopenharmony_ci rast->last_fence->issued = TRUE; 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_ci if (rast->num_threads == 0) { 1109bf215546Sopenharmony_ci /* no threading */ 1110bf215546Sopenharmony_ci unsigned fpstate = util_fpstate_get(); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci /* Make sure that denorms are treated like zeros. This is 1113bf215546Sopenharmony_ci * the behavior required by D3D10. OpenGL doesn't care. 1114bf215546Sopenharmony_ci */ 1115bf215546Sopenharmony_ci util_fpstate_set_denorms_to_zero(fpstate); 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci lp_rast_begin(rast, scene); 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci rasterize_scene(&rast->tasks[0], scene); 1120bf215546Sopenharmony_ci 1121bf215546Sopenharmony_ci lp_rast_end(rast); 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci util_fpstate_set(fpstate); 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_ci rast->curr_scene = NULL; 1126bf215546Sopenharmony_ci } 1127bf215546Sopenharmony_ci else { 1128bf215546Sopenharmony_ci /* threaded rendering! */ 1129bf215546Sopenharmony_ci unsigned i; 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci lp_scene_enqueue(rast->full_scenes, scene); 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci /* signal the threads that there's work to do */ 1134bf215546Sopenharmony_ci for (i = 0; i < rast->num_threads; i++) { 1135bf215546Sopenharmony_ci pipe_semaphore_signal(&rast->tasks[i].work_ready); 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci } 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); 1140bf215546Sopenharmony_ci} 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_civoid 1144bf215546Sopenharmony_cilp_rast_finish(struct lp_rasterizer *rast) 1145bf215546Sopenharmony_ci{ 1146bf215546Sopenharmony_ci if (rast->num_threads == 0) { 1147bf215546Sopenharmony_ci /* nothing to do */ 1148bf215546Sopenharmony_ci } 1149bf215546Sopenharmony_ci else { 1150bf215546Sopenharmony_ci int i; 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci /* wait for work to complete */ 1153bf215546Sopenharmony_ci for (i = 0; i < rast->num_threads; i++) { 1154bf215546Sopenharmony_ci pipe_semaphore_wait(&rast->tasks[i].work_done); 1155bf215546Sopenharmony_ci } 1156bf215546Sopenharmony_ci } 1157bf215546Sopenharmony_ci} 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci/** 1161bf215546Sopenharmony_ci * This is the thread's main entrypoint. 1162bf215546Sopenharmony_ci * It's a simple loop: 1163bf215546Sopenharmony_ci * 1. wait for work 1164bf215546Sopenharmony_ci * 2. do work 1165bf215546Sopenharmony_ci * 3. signal that we're done 1166bf215546Sopenharmony_ci */ 1167bf215546Sopenharmony_cistatic int 1168bf215546Sopenharmony_cithread_function(void *init_data) 1169bf215546Sopenharmony_ci{ 1170bf215546Sopenharmony_ci struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; 1171bf215546Sopenharmony_ci struct lp_rasterizer *rast = task->rast; 1172bf215546Sopenharmony_ci boolean debug = false; 1173bf215546Sopenharmony_ci char thread_name[16]; 1174bf215546Sopenharmony_ci 1175bf215546Sopenharmony_ci snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); 1176bf215546Sopenharmony_ci u_thread_setname(thread_name); 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci /* Make sure that denorms are treated like zeros. This is 1179bf215546Sopenharmony_ci * the behavior required by D3D10. OpenGL doesn't care. 1180bf215546Sopenharmony_ci */ 1181bf215546Sopenharmony_ci unsigned fpstate = util_fpstate_get(); 1182bf215546Sopenharmony_ci util_fpstate_set_denorms_to_zero(fpstate); 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_ci while (1) { 1185bf215546Sopenharmony_ci /* wait for work */ 1186bf215546Sopenharmony_ci if (debug) 1187bf215546Sopenharmony_ci debug_printf("thread %d waiting for work\n", task->thread_index); 1188bf215546Sopenharmony_ci pipe_semaphore_wait(&task->work_ready); 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci if (rast->exit_flag) 1191bf215546Sopenharmony_ci break; 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci if (task->thread_index == 0) { 1194bf215546Sopenharmony_ci /* thread[0]: 1195bf215546Sopenharmony_ci * - get next scene to rasterize 1196bf215546Sopenharmony_ci * - map the framebuffer surfaces 1197bf215546Sopenharmony_ci */ 1198bf215546Sopenharmony_ci lp_rast_begin(rast, lp_scene_dequeue(rast->full_scenes, TRUE)); 1199bf215546Sopenharmony_ci } 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci /* Wait for all threads to get here so that threads[1+] don't 1202bf215546Sopenharmony_ci * get a null rast->curr_scene pointer. 1203bf215546Sopenharmony_ci */ 1204bf215546Sopenharmony_ci util_barrier_wait(&rast->barrier); 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci /* do work */ 1207bf215546Sopenharmony_ci if (debug) 1208bf215546Sopenharmony_ci debug_printf("thread %d doing work\n", task->thread_index); 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci rasterize_scene(task, rast->curr_scene); 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci /* wait for all threads to finish with this scene */ 1213bf215546Sopenharmony_ci util_barrier_wait(&rast->barrier); 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_ci /* XXX: shouldn't be necessary: 1216bf215546Sopenharmony_ci */ 1217bf215546Sopenharmony_ci if (task->thread_index == 0) { 1218bf215546Sopenharmony_ci lp_rast_end(rast); 1219bf215546Sopenharmony_ci } 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci /* signal done with work */ 1222bf215546Sopenharmony_ci if (debug) 1223bf215546Sopenharmony_ci debug_printf("thread %d done working\n", task->thread_index); 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci pipe_semaphore_signal(&task->work_done); 1226bf215546Sopenharmony_ci } 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci#ifdef _WIN32 1229bf215546Sopenharmony_ci pipe_semaphore_signal(&task->work_done); 1230bf215546Sopenharmony_ci#endif 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci return 0; 1233bf215546Sopenharmony_ci} 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_ci/** 1237bf215546Sopenharmony_ci * Initialize semaphores and spawn the threads. 1238bf215546Sopenharmony_ci */ 1239bf215546Sopenharmony_cistatic void 1240bf215546Sopenharmony_cicreate_rast_threads(struct lp_rasterizer *rast) 1241bf215546Sopenharmony_ci{ 1242bf215546Sopenharmony_ci /* NOTE: if num_threads is zero, we won't use any threads */ 1243bf215546Sopenharmony_ci for (unsigned i = 0; i < rast->num_threads; i++) { 1244bf215546Sopenharmony_ci pipe_semaphore_init(&rast->tasks[i].work_ready, 0); 1245bf215546Sopenharmony_ci pipe_semaphore_init(&rast->tasks[i].work_done, 0); 1246bf215546Sopenharmony_ci if (thrd_success != u_thread_create(rast->threads + i, thread_function, 1247bf215546Sopenharmony_ci (void *) &rast->tasks[i])) { 1248bf215546Sopenharmony_ci rast->num_threads = i; /* previous thread is max */ 1249bf215546Sopenharmony_ci break; 1250bf215546Sopenharmony_ci } 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci} 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci/** 1256bf215546Sopenharmony_ci * Create new lp_rasterizer. If num_threads is zero, don't create any 1257bf215546Sopenharmony_ci * new threads, do rendering synchronously. 1258bf215546Sopenharmony_ci * \param num_threads number of rasterizer threads to create 1259bf215546Sopenharmony_ci */ 1260bf215546Sopenharmony_cistruct lp_rasterizer * 1261bf215546Sopenharmony_cilp_rast_create(unsigned num_threads) 1262bf215546Sopenharmony_ci{ 1263bf215546Sopenharmony_ci struct lp_rasterizer *rast; 1264bf215546Sopenharmony_ci unsigned i; 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_ci rast = CALLOC_STRUCT(lp_rasterizer); 1267bf215546Sopenharmony_ci if (!rast) { 1268bf215546Sopenharmony_ci goto no_rast; 1269bf215546Sopenharmony_ci } 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci rast->full_scenes = lp_scene_queue_create(); 1272bf215546Sopenharmony_ci if (!rast->full_scenes) { 1273bf215546Sopenharmony_ci goto no_full_scenes; 1274bf215546Sopenharmony_ci } 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci for (i = 0; i < MAX2(1, num_threads); i++) { 1277bf215546Sopenharmony_ci struct lp_rasterizer_task *task = &rast->tasks[i]; 1278bf215546Sopenharmony_ci task->rast = rast; 1279bf215546Sopenharmony_ci task->thread_index = i; 1280bf215546Sopenharmony_ci task->thread_data.cache = 1281bf215546Sopenharmony_ci align_malloc(sizeof(struct lp_build_format_cache), 16); 1282bf215546Sopenharmony_ci if (!task->thread_data.cache) { 1283bf215546Sopenharmony_ci goto no_thread_data_cache; 1284bf215546Sopenharmony_ci } 1285bf215546Sopenharmony_ci } 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_ci rast->num_threads = num_threads; 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci create_rast_threads(rast); 1292bf215546Sopenharmony_ci 1293bf215546Sopenharmony_ci /* for synchronizing rasterization threads */ 1294bf215546Sopenharmony_ci if (rast->num_threads > 0) { 1295bf215546Sopenharmony_ci util_barrier_init(&rast->barrier, rast->num_threads); 1296bf215546Sopenharmony_ci } 1297bf215546Sopenharmony_ci 1298bf215546Sopenharmony_ci memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); 1299bf215546Sopenharmony_ci 1300bf215546Sopenharmony_ci return rast; 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_cino_thread_data_cache: 1303bf215546Sopenharmony_ci for (i = 0; i < MAX2(1, rast->num_threads); i++) { 1304bf215546Sopenharmony_ci if (rast->tasks[i].thread_data.cache) { 1305bf215546Sopenharmony_ci align_free(rast->tasks[i].thread_data.cache); 1306bf215546Sopenharmony_ci } 1307bf215546Sopenharmony_ci } 1308bf215546Sopenharmony_ci 1309bf215546Sopenharmony_ci lp_scene_queue_destroy(rast->full_scenes); 1310bf215546Sopenharmony_cino_full_scenes: 1311bf215546Sopenharmony_ci FREE(rast); 1312bf215546Sopenharmony_cino_rast: 1313bf215546Sopenharmony_ci return NULL; 1314bf215546Sopenharmony_ci} 1315bf215546Sopenharmony_ci 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_ci/* Shutdown: 1318bf215546Sopenharmony_ci */ 1319bf215546Sopenharmony_civoid 1320bf215546Sopenharmony_cilp_rast_destroy(struct lp_rasterizer *rast) 1321bf215546Sopenharmony_ci{ 1322bf215546Sopenharmony_ci /* Set exit_flag and signal each thread's work_ready semaphore. 1323bf215546Sopenharmony_ci * Each thread will be woken up, notice that the exit_flag is set and 1324bf215546Sopenharmony_ci * break out of its main loop. The thread will then exit. 1325bf215546Sopenharmony_ci */ 1326bf215546Sopenharmony_ci rast->exit_flag = TRUE; 1327bf215546Sopenharmony_ci for (unsigned i = 0; i < rast->num_threads; i++) { 1328bf215546Sopenharmony_ci pipe_semaphore_signal(&rast->tasks[i].work_ready); 1329bf215546Sopenharmony_ci } 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_ci /* Wait for threads to terminate before cleaning up per-thread data. 1332bf215546Sopenharmony_ci * We don't actually call pipe_thread_wait to avoid dead lock on Windows 1333bf215546Sopenharmony_ci * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ 1334bf215546Sopenharmony_ci for (unsigned i = 0; i < rast->num_threads; i++) { 1335bf215546Sopenharmony_ci#ifdef _WIN32 1336bf215546Sopenharmony_ci /* Threads might already be dead - Windows apparently terminates 1337bf215546Sopenharmony_ci * other threads when returning from main. 1338bf215546Sopenharmony_ci */ 1339bf215546Sopenharmony_ci DWORD exit_code = STILL_ACTIVE; 1340bf215546Sopenharmony_ci if (GetExitCodeThread(rast->threads[i], &exit_code) && 1341bf215546Sopenharmony_ci exit_code == STILL_ACTIVE) { 1342bf215546Sopenharmony_ci pipe_semaphore_wait(&rast->tasks[i].work_done); 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci#else 1345bf215546Sopenharmony_ci thrd_join(rast->threads[i], NULL); 1346bf215546Sopenharmony_ci#endif 1347bf215546Sopenharmony_ci } 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci /* Clean up per-thread data */ 1350bf215546Sopenharmony_ci for (unsigned i = 0; i < rast->num_threads; i++) { 1351bf215546Sopenharmony_ci pipe_semaphore_destroy(&rast->tasks[i].work_ready); 1352bf215546Sopenharmony_ci pipe_semaphore_destroy(&rast->tasks[i].work_done); 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX2(1, rast->num_threads); i++) { 1355bf215546Sopenharmony_ci align_free(rast->tasks[i].thread_data.cache); 1356bf215546Sopenharmony_ci } 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_ci lp_fence_reference(&rast->last_fence, NULL); 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci /* for synchronizing rasterization threads */ 1361bf215546Sopenharmony_ci if (rast->num_threads > 0) { 1362bf215546Sopenharmony_ci util_barrier_destroy(&rast->barrier); 1363bf215546Sopenharmony_ci } 1364bf215546Sopenharmony_ci 1365bf215546Sopenharmony_ci lp_scene_queue_destroy(rast->full_scenes); 1366bf215546Sopenharmony_ci 1367bf215546Sopenharmony_ci FREE(rast); 1368bf215546Sopenharmony_ci} 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_civoid lp_rast_fence(struct lp_rasterizer *rast, 1371bf215546Sopenharmony_ci struct lp_fence **fence) 1372bf215546Sopenharmony_ci{ 1373bf215546Sopenharmony_ci if (fence) 1374bf215546Sopenharmony_ci lp_fence_reference((struct lp_fence **)fence, rast->last_fence); 1375bf215546Sopenharmony_ci} 1376