1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2007 VMware, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* 29bf215546Sopenharmony_ci * Binning code for triangles 30bf215546Sopenharmony_ci */ 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include "util/u_math.h" 33bf215546Sopenharmony_ci#include "util/u_memory.h" 34bf215546Sopenharmony_ci#include "util/u_rect.h" 35bf215546Sopenharmony_ci#include "util/u_sse.h" 36bf215546Sopenharmony_ci#include "lp_perf.h" 37bf215546Sopenharmony_ci#include "lp_setup_context.h" 38bf215546Sopenharmony_ci#include "lp_rast.h" 39bf215546Sopenharmony_ci#include "lp_state_fs.h" 40bf215546Sopenharmony_ci#include "lp_state_setup.h" 41bf215546Sopenharmony_ci#include "lp_context.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#include <inttypes.h> 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci#if defined(PIPE_ARCH_SSE) 47bf215546Sopenharmony_ci#include <emmintrin.h> 48bf215546Sopenharmony_ci#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN 49bf215546Sopenharmony_ci#include <altivec.h> 50bf215546Sopenharmony_ci#include "util/u_pwr8.h" 51bf215546Sopenharmony_ci#endif 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci#if !defined(PIPE_ARCH_SSE) 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_cistatic inline int 56bf215546Sopenharmony_cisubpixel_snap(float a) 57bf215546Sopenharmony_ci{ 58bf215546Sopenharmony_ci return util_iround(FIXED_ONE * a); 59bf215546Sopenharmony_ci} 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci#endif 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci/* Position and area in fixed point coordinates */ 64bf215546Sopenharmony_cistruct fixed_position { 65bf215546Sopenharmony_ci int32_t x[4]; 66bf215546Sopenharmony_ci int32_t y[4]; 67bf215546Sopenharmony_ci int32_t dx01; 68bf215546Sopenharmony_ci int32_t dy01; 69bf215546Sopenharmony_ci int32_t dx20; 70bf215546Sopenharmony_ci int32_t dy20; 71bf215546Sopenharmony_ci}; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci/** 75bf215546Sopenharmony_ci * Alloc space for a new triangle plus the input.a0/dadx/dady arrays 76bf215546Sopenharmony_ci * immediately after it. 77bf215546Sopenharmony_ci * The memory is allocated from the per-scene pool, not per-tile. 78bf215546Sopenharmony_ci * \param tri_size returns number of bytes allocated 79bf215546Sopenharmony_ci * \param num_inputs number of fragment shader inputs 80bf215546Sopenharmony_ci * \return pointer to triangle space 81bf215546Sopenharmony_ci */ 82bf215546Sopenharmony_cistruct lp_rast_triangle * 83bf215546Sopenharmony_cilp_setup_alloc_triangle(struct lp_scene *scene, 84bf215546Sopenharmony_ci unsigned nr_inputs, 85bf215546Sopenharmony_ci unsigned nr_planes, 86bf215546Sopenharmony_ci unsigned *tri_size) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci // add 1 for XYZW position 89bf215546Sopenharmony_ci unsigned input_array_sz = (nr_inputs + 1) * sizeof(float[4]); 90bf215546Sopenharmony_ci unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0); 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci *tri_size = (sizeof(struct lp_rast_triangle) + 95bf215546Sopenharmony_ci 3 * input_array_sz + // 3 = da + dadx + dady 96bf215546Sopenharmony_ci plane_sz); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci struct lp_rast_triangle *tri = lp_scene_alloc_aligned(scene, *tri_size, 16); 99bf215546Sopenharmony_ci if (!tri) 100bf215546Sopenharmony_ci return NULL; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci tri->inputs.stride = input_array_sz; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci { 105bf215546Sopenharmony_ci ASSERTED char *a = (char *)tri; 106bf215546Sopenharmony_ci ASSERTED char *b = (char *)&GET_PLANES(tri)[nr_planes]; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci assert(b - a == *tri_size); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci return tri; 112bf215546Sopenharmony_ci} 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_civoid 115bf215546Sopenharmony_cilp_setup_print_vertex(struct lp_setup_context *setup, 116bf215546Sopenharmony_ci const char *name, 117bf215546Sopenharmony_ci const float (*v)[4]) 118bf215546Sopenharmony_ci{ 119bf215546Sopenharmony_ci const struct lp_setup_variant_key *key = &setup->setup.variant->key; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", 122bf215546Sopenharmony_ci name, 123bf215546Sopenharmony_ci v[0][0], v[0][1], v[0][2], v[0][3]); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci for (int i = 0; i < key->num_inputs; i++) { 126bf215546Sopenharmony_ci const float *in = v[key->inputs[i].src_index]; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", 129bf215546Sopenharmony_ci i, 130bf215546Sopenharmony_ci name, key->inputs[i].src_index, 131bf215546Sopenharmony_ci (key->inputs[i].usage_mask & 0x1) ? "x" : " ", 132bf215546Sopenharmony_ci (key->inputs[i].usage_mask & 0x2) ? "y" : " ", 133bf215546Sopenharmony_ci (key->inputs[i].usage_mask & 0x4) ? "z" : " ", 134bf215546Sopenharmony_ci (key->inputs[i].usage_mask & 0x8) ? "w" : " "); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci for (int j = 0; j < 4; j++) 137bf215546Sopenharmony_ci if (key->inputs[i].usage_mask & (1<<j)) 138bf215546Sopenharmony_ci debug_printf("%.5f ", in[j]); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci debug_printf("\n"); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci} 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci/** 146bf215546Sopenharmony_ci * Print triangle vertex attribs (for debug). 147bf215546Sopenharmony_ci */ 148bf215546Sopenharmony_civoid 149bf215546Sopenharmony_cilp_setup_print_triangle(struct lp_setup_context *setup, 150bf215546Sopenharmony_ci const float (*v0)[4], 151bf215546Sopenharmony_ci const float (*v1)[4], 152bf215546Sopenharmony_ci const float (*v2)[4]) 153bf215546Sopenharmony_ci{ 154bf215546Sopenharmony_ci debug_printf("triangle\n"); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci { 157bf215546Sopenharmony_ci const float ex = v0[0][0] - v2[0][0]; 158bf215546Sopenharmony_ci const float ey = v0[0][1] - v2[0][1]; 159bf215546Sopenharmony_ci const float fx = v1[0][0] - v2[0][0]; 160bf215546Sopenharmony_ci const float fy = v1[0][1] - v2[0][1]; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci /* det = cross(e,f).z */ 163bf215546Sopenharmony_ci const float det = ex * fy - ey * fx; 164bf215546Sopenharmony_ci if (det < 0.0f) 165bf215546Sopenharmony_ci debug_printf(" - ccw\n"); 166bf215546Sopenharmony_ci else if (det > 0.0f) 167bf215546Sopenharmony_ci debug_printf(" - cw\n"); 168bf215546Sopenharmony_ci else 169bf215546Sopenharmony_ci debug_printf(" - zero area\n"); 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci lp_setup_print_vertex(setup, "v0", v0); 173bf215546Sopenharmony_ci lp_setup_print_vertex(setup, "v1", v1); 174bf215546Sopenharmony_ci lp_setup_print_vertex(setup, "v2", v2); 175bf215546Sopenharmony_ci} 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci#define MAX_PLANES 8 179bf215546Sopenharmony_cistatic unsigned 180bf215546Sopenharmony_cilp_rast_tri_tab[MAX_PLANES+1] = { 181bf215546Sopenharmony_ci 0, /* should be impossible */ 182bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_1, 183bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_2, 184bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_3, 185bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_4, 186bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_5, 187bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_6, 188bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_7, 189bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_8 190bf215546Sopenharmony_ci}; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_cistatic unsigned 193bf215546Sopenharmony_cilp_rast_32_tri_tab[MAX_PLANES+1] = { 194bf215546Sopenharmony_ci 0, /* should be impossible */ 195bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_1, 196bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_2, 197bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_3, 198bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_4, 199bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_5, 200bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_6, 201bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_7, 202bf215546Sopenharmony_ci LP_RAST_OP_TRIANGLE_32_8 203bf215546Sopenharmony_ci}; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_cistatic unsigned 207bf215546Sopenharmony_cilp_rast_ms_tri_tab[MAX_PLANES+1] = { 208bf215546Sopenharmony_ci 0, /* should be impossible */ 209bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_1, 210bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_2, 211bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_3, 212bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_4, 213bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_5, 214bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_6, 215bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_7, 216bf215546Sopenharmony_ci LP_RAST_OP_MS_TRIANGLE_8 217bf215546Sopenharmony_ci}; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci/* 221bf215546Sopenharmony_ci * Detect big primitives drawn with an alpha == 1.0. 222bf215546Sopenharmony_ci * 223bf215546Sopenharmony_ci * This is used when simulating anti-aliasing primitives in shaders, e.g., 224bf215546Sopenharmony_ci * when drawing the windows client area in Aero's flip-3d effect. 225bf215546Sopenharmony_ci */ 226bf215546Sopenharmony_cistatic boolean 227bf215546Sopenharmony_cicheck_opaque(const struct lp_setup_context *setup, 228bf215546Sopenharmony_ci const float (*v1)[4], 229bf215546Sopenharmony_ci const float (*v2)[4], 230bf215546Sopenharmony_ci const float (*v3)[4]) 231bf215546Sopenharmony_ci{ 232bf215546Sopenharmony_ci const struct lp_fragment_shader_variant *variant = 233bf215546Sopenharmony_ci setup->fs.current.variant; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci if (variant->opaque) 236bf215546Sopenharmony_ci return TRUE; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci if (!variant->potentially_opaque) 239bf215546Sopenharmony_ci return FALSE; 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci const struct lp_tgsi_channel_info *alpha_info = &variant->shader->info.cbuf[0][3]; 242bf215546Sopenharmony_ci if (alpha_info->file == TGSI_FILE_CONSTANT) { 243bf215546Sopenharmony_ci const float *constants = setup->fs.current.jit_context.constants[0]; 244bf215546Sopenharmony_ci float alpha = constants[alpha_info->u.index*4 + 245bf215546Sopenharmony_ci alpha_info->swizzle]; 246bf215546Sopenharmony_ci return alpha == 1.0f; 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci if (alpha_info->file == TGSI_FILE_INPUT) { 250bf215546Sopenharmony_ci return (v1[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f && 251bf215546Sopenharmony_ci v2[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f && 252bf215546Sopenharmony_ci v3[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f); 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci return FALSE; 256bf215546Sopenharmony_ci} 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci/** 260bf215546Sopenharmony_ci * Do basic setup for triangle rasterization and determine which 261bf215546Sopenharmony_ci * framebuffer tiles are touched. Put the triangle in the scene's 262bf215546Sopenharmony_ci * bins for the tiles which we overlap. 263bf215546Sopenharmony_ci */ 264bf215546Sopenharmony_cistatic boolean 265bf215546Sopenharmony_cido_triangle_ccw(struct lp_setup_context *setup, 266bf215546Sopenharmony_ci struct fixed_position *position, 267bf215546Sopenharmony_ci const float (*v0)[4], 268bf215546Sopenharmony_ci const float (*v1)[4], 269bf215546Sopenharmony_ci const float (*v2)[4], 270bf215546Sopenharmony_ci boolean frontfacing) 271bf215546Sopenharmony_ci{ 272bf215546Sopenharmony_ci struct lp_scene *scene = setup->scene; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci if (0) 275bf215546Sopenharmony_ci lp_setup_print_triangle(setup, v0, v1, v2); 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci const float (*pv)[4]; 278bf215546Sopenharmony_ci if (setup->flatshade_first) { 279bf215546Sopenharmony_ci pv = v0; 280bf215546Sopenharmony_ci } else { 281bf215546Sopenharmony_ci pv = v2; 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci unsigned viewport_index = 0; 285bf215546Sopenharmony_ci if (setup->viewport_index_slot > 0) { 286bf215546Sopenharmony_ci unsigned *udata = (unsigned*)pv[setup->viewport_index_slot]; 287bf215546Sopenharmony_ci viewport_index = lp_clamp_viewport_idx(*udata); 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci unsigned layer = 0; 291bf215546Sopenharmony_ci if (setup->layer_slot > 0) { 292bf215546Sopenharmony_ci layer = *(unsigned*)pv[setup->layer_slot]; 293bf215546Sopenharmony_ci layer = MIN2(layer, scene->fb_max_layer); 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci /* Bounding rectangle (in pixels) */ 297bf215546Sopenharmony_ci struct u_rect bbox; 298bf215546Sopenharmony_ci { 299bf215546Sopenharmony_ci /* Yes this is necessary to accurately calculate bounding boxes 300bf215546Sopenharmony_ci * with the two fill-conventions we support. GL (normally) ends 301bf215546Sopenharmony_ci * up needing a bottom-left fill convention, which requires 302bf215546Sopenharmony_ci * slightly different rounding. 303bf215546Sopenharmony_ci */ 304bf215546Sopenharmony_ci int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci /* Inclusive x0, exclusive x1 */ 307bf215546Sopenharmony_ci bbox.x0 = MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER; 308bf215546Sopenharmony_ci bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER; 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */ 311bf215546Sopenharmony_ci bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER; 312bf215546Sopenharmony_ci bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { 316bf215546Sopenharmony_ci if (0) debug_printf("no intersection\n"); 317bf215546Sopenharmony_ci LP_COUNT(nr_culled_tris); 318bf215546Sopenharmony_ci return TRUE; 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci int max_szorig = ((bbox.x1 - (bbox.x0 & ~3)) | 322bf215546Sopenharmony_ci (bbox.y1 - (bbox.y0 & ~3))); 323bf215546Sopenharmony_ci boolean use_32bits = max_szorig <= MAX_FIXED_LENGTH32; 324bf215546Sopenharmony_ci#if defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN 325bf215546Sopenharmony_ci boolean pwr8_limit_check = (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 && 326bf215546Sopenharmony_ci (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32; 327bf215546Sopenharmony_ci#endif 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci /* Can safely discard negative regions, but need to keep hold of 330bf215546Sopenharmony_ci * information about when the triangle extends past screen 331bf215546Sopenharmony_ci * boundaries. See trimmed_box in lp_setup_bin_triangle(). 332bf215546Sopenharmony_ci */ 333bf215546Sopenharmony_ci bbox.x0 = MAX2(bbox.x0, 0); 334bf215546Sopenharmony_ci bbox.y0 = MAX2(bbox.y0, 0); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci int nr_planes = 3; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci /* 339bf215546Sopenharmony_ci * Determine how many scissor planes we need, that is drop scissor 340bf215546Sopenharmony_ci * edges if the bounding box of the tri is fully inside that edge. 341bf215546Sopenharmony_ci */ 342bf215546Sopenharmony_ci const struct u_rect *scissor = &setup->draw_regions[viewport_index]; 343bf215546Sopenharmony_ci boolean s_planes[4]; 344bf215546Sopenharmony_ci scissor_planes_needed(s_planes, &bbox, scissor); 345bf215546Sopenharmony_ci nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci unsigned tri_bytes; 348bf215546Sopenharmony_ci const struct lp_setup_variant_key *key = &setup->setup.variant->key; 349bf215546Sopenharmony_ci struct lp_rast_triangle *tri = 350bf215546Sopenharmony_ci lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &tri_bytes); 351bf215546Sopenharmony_ci if (!tri) 352bf215546Sopenharmony_ci return FALSE; 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci#ifdef DEBUG 355bf215546Sopenharmony_ci tri->v[0][0] = v0[0][0]; 356bf215546Sopenharmony_ci tri->v[1][0] = v1[0][0]; 357bf215546Sopenharmony_ci tri->v[2][0] = v2[0][0]; 358bf215546Sopenharmony_ci tri->v[0][1] = v0[0][1]; 359bf215546Sopenharmony_ci tri->v[1][1] = v1[0][1]; 360bf215546Sopenharmony_ci tri->v[2][1] = v2[0][1]; 361bf215546Sopenharmony_ci#endif 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci LP_COUNT(nr_tris); 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci /* 366bf215546Sopenharmony_ci * Rotate the tri such that v0 is closest to the fb origin. 367bf215546Sopenharmony_ci * This can give more accurate a0 value (which is at fb origin) 368bf215546Sopenharmony_ci * when calculating the interpolants. 369bf215546Sopenharmony_ci * It can't work when there's flat shading for instance in one 370bf215546Sopenharmony_ci * of the attributes, hence restrict this to just a single attribute 371bf215546Sopenharmony_ci * which is what causes some test failures. 372bf215546Sopenharmony_ci * (This does not address the problem that interpolation may be 373bf215546Sopenharmony_ci * inaccurate if gradients are relatively steep in small tris far 374bf215546Sopenharmony_ci * away from the origin. It does however fix the (silly) wgf11rasterizer 375bf215546Sopenharmony_ci * Interpolator test.) 376bf215546Sopenharmony_ci * XXX This causes problems with mipgen -EmuTexture for not yet really 377bf215546Sopenharmony_ci * understood reasons (if the vertices would be submitted in a different 378bf215546Sopenharmony_ci * order, we'd also generate the same "wrong" results here without 379bf215546Sopenharmony_ci * rotation). In any case, that we generate different values if a prim 380bf215546Sopenharmony_ci * has the vertices rotated but is otherwise the same (which is due to 381bf215546Sopenharmony_ci * numerical issues) is not a nice property. An additional problem by 382bf215546Sopenharmony_ci * swapping the vertices here (which is possibly worse) is that 383bf215546Sopenharmony_ci * the same primitive coming in twice might generate different values 384bf215546Sopenharmony_ci * (in particular for z) due to the swapping potentially not happening 385bf215546Sopenharmony_ci * both times, if the attributes to be interpolated are different. For now, 386bf215546Sopenharmony_ci * just restrict this to not get used with dx9 (by checking pixel offset), 387bf215546Sopenharmony_ci * could also restrict it further to only trigger with wgf11Interpolator 388bf215546Sopenharmony_ci * Rasterizer test (the only place which needs it, with always the same 389bf215546Sopenharmony_ci * vertices even). 390bf215546Sopenharmony_ci */ 391bf215546Sopenharmony_ci if ((LP_DEBUG & DEBUG_ACCURATE_A0) && 392bf215546Sopenharmony_ci setup->pixel_offset == 0.5f && 393bf215546Sopenharmony_ci key->num_inputs == 1 && 394bf215546Sopenharmony_ci (key->inputs[0].interp == LP_INTERP_LINEAR || 395bf215546Sopenharmony_ci key->inputs[0].interp == LP_INTERP_PERSPECTIVE)) { 396bf215546Sopenharmony_ci float dist0 = v0[0][0] * v0[0][0] + v0[0][1] * v0[0][1]; 397bf215546Sopenharmony_ci float dist1 = v1[0][0] * v1[0][0] + v1[0][1] * v1[0][1]; 398bf215546Sopenharmony_ci float dist2 = v2[0][0] * v2[0][0] + v2[0][1] * v2[0][1]; 399bf215546Sopenharmony_ci if (dist0 > dist1 && dist1 < dist2) { 400bf215546Sopenharmony_ci const float (*vt)[4]; 401bf215546Sopenharmony_ci int x, y; 402bf215546Sopenharmony_ci vt = v0; 403bf215546Sopenharmony_ci v0 = v1; 404bf215546Sopenharmony_ci v1 = v2; 405bf215546Sopenharmony_ci v2 = vt; 406bf215546Sopenharmony_ci x = position->x[0]; 407bf215546Sopenharmony_ci y = position->y[0]; 408bf215546Sopenharmony_ci position->x[0] = position->x[1]; 409bf215546Sopenharmony_ci position->y[0] = position->y[1]; 410bf215546Sopenharmony_ci position->x[1] = position->x[2]; 411bf215546Sopenharmony_ci position->y[1] = position->y[2]; 412bf215546Sopenharmony_ci position->x[2] = x; 413bf215546Sopenharmony_ci position->y[2] = y; 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci position->dx20 = position->dx01; 416bf215546Sopenharmony_ci position->dy20 = position->dy01; 417bf215546Sopenharmony_ci position->dx01 = position->x[0] - position->x[1]; 418bf215546Sopenharmony_ci position->dy01 = position->y[0] - position->y[1]; 419bf215546Sopenharmony_ci } else if (dist0 > dist2) { 420bf215546Sopenharmony_ci const float (*vt)[4]; 421bf215546Sopenharmony_ci int x, y; 422bf215546Sopenharmony_ci vt = v0; 423bf215546Sopenharmony_ci v0 = v2; 424bf215546Sopenharmony_ci v2 = v1; 425bf215546Sopenharmony_ci v1 = vt; 426bf215546Sopenharmony_ci x = position->x[0]; 427bf215546Sopenharmony_ci y = position->y[0]; 428bf215546Sopenharmony_ci position->x[0] = position->x[2]; 429bf215546Sopenharmony_ci position->y[0] = position->y[2]; 430bf215546Sopenharmony_ci position->x[2] = position->x[1]; 431bf215546Sopenharmony_ci position->y[2] = position->y[1]; 432bf215546Sopenharmony_ci position->x[1] = x; 433bf215546Sopenharmony_ci position->y[1] = y; 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci position->dx01 = position->dx20; 436bf215546Sopenharmony_ci position->dy01 = position->dy20; 437bf215546Sopenharmony_ci position->dx20 = position->x[2] - position->x[0]; 438bf215546Sopenharmony_ci position->dy20 = position->y[2] - position->y[0]; 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci } 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci /* Setup parameter interpolants: 443bf215546Sopenharmony_ci */ 444bf215546Sopenharmony_ci setup->setup.variant->jit_function(v0, v1, v2, 445bf215546Sopenharmony_ci frontfacing, 446bf215546Sopenharmony_ci GET_A0(&tri->inputs), 447bf215546Sopenharmony_ci GET_DADX(&tri->inputs), 448bf215546Sopenharmony_ci GET_DADY(&tri->inputs), 449bf215546Sopenharmony_ci &setup->setup.variant->key); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci tri->inputs.frontfacing = frontfacing; 452bf215546Sopenharmony_ci tri->inputs.disable = FALSE; 453bf215546Sopenharmony_ci tri->inputs.is_blit = FALSE; 454bf215546Sopenharmony_ci tri->inputs.layer = layer; 455bf215546Sopenharmony_ci tri->inputs.viewport_index = viewport_index; 456bf215546Sopenharmony_ci tri->inputs.view_index = setup->view_index; 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci if (0) 459bf215546Sopenharmony_ci lp_dump_setup_coef(&setup->setup.variant->key, 460bf215546Sopenharmony_ci GET_A0(&tri->inputs), 461bf215546Sopenharmony_ci GET_DADX(&tri->inputs), 462bf215546Sopenharmony_ci GET_DADY(&tri->inputs)); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci struct lp_rast_plane *plane = GET_PLANES(tri); 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci#if defined(PIPE_ARCH_SSE) 467bf215546Sopenharmony_ci if (1) { 468bf215546Sopenharmony_ci __m128i vertx, verty; 469bf215546Sopenharmony_ci __m128i shufx, shufy; 470bf215546Sopenharmony_ci __m128i dcdx, dcdy; 471bf215546Sopenharmony_ci __m128i cdx02, cdx13, cdy02, cdy13, c02, c13; 472bf215546Sopenharmony_ci __m128i c01, c23, unused; 473bf215546Sopenharmony_ci __m128i dcdx_neg_mask; 474bf215546Sopenharmony_ci __m128i dcdy_neg_mask; 475bf215546Sopenharmony_ci __m128i dcdx_zero_mask; 476bf215546Sopenharmony_ci __m128i top_left_flag, c_dec; 477bf215546Sopenharmony_ci __m128i eo, p0, p1, p2; 478bf215546Sopenharmony_ci __m128i zero = _mm_setzero_si128(); 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */ 481bf215546Sopenharmony_ci verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */ 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); 484bf215546Sopenharmony_ci shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci dcdx = _mm_sub_epi32(verty, shufy); 487bf215546Sopenharmony_ci dcdy = _mm_sub_epi32(vertx, shufx); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); 490bf215546Sopenharmony_ci dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); 491bf215546Sopenharmony_ci dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0); 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci c_dec = _mm_or_si128(dcdx_neg_mask, 496bf215546Sopenharmony_ci _mm_and_si128(dcdx_zero_mask, 497bf215546Sopenharmony_ci _mm_xor_si128(dcdy_neg_mask, 498bf215546Sopenharmony_ci top_left_flag))); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci /* 501bf215546Sopenharmony_ci * 64 bit arithmetic. 502bf215546Sopenharmony_ci * Note we need _signed_ mul (_mm_mul_epi32) which we emulate. 503bf215546Sopenharmony_ci */ 504bf215546Sopenharmony_ci cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13); 505bf215546Sopenharmony_ci cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13); 506bf215546Sopenharmony_ci c02 = _mm_sub_epi64(cdx02, cdy02); 507bf215546Sopenharmony_ci c13 = _mm_sub_epi64(cdx13, cdy13); 508bf215546Sopenharmony_ci c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec, 509bf215546Sopenharmony_ci _MM_SHUFFLE(2,2,0,0))); 510bf215546Sopenharmony_ci c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec, 511bf215546Sopenharmony_ci _MM_SHUFFLE(3,3,1,1))); 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci /* 514bf215546Sopenharmony_ci * Useful for very small fbs/tris (or fewer subpixel bits) only: 515bf215546Sopenharmony_ci * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), 516bf215546Sopenharmony_ci * mm_mullo_epi32(dcdy, verty)); 517bf215546Sopenharmony_ci * 518bf215546Sopenharmony_ci * c = _mm_sub_epi32(c, c_dec); 519bf215546Sopenharmony_ci */ 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci /* Scale up to match c: 522bf215546Sopenharmony_ci */ 523bf215546Sopenharmony_ci dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); 524bf215546Sopenharmony_ci dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci /* 527bf215546Sopenharmony_ci * Calculate trivial reject values: 528bf215546Sopenharmony_ci * Note eo cannot overflow even if dcdx/dcdy would already have 529bf215546Sopenharmony_ci * 31 bits (which they shouldn't have). This is because eo 530bf215546Sopenharmony_ci * is never negative (albeit if we rely on that need to be careful...) 531bf215546Sopenharmony_ci */ 532bf215546Sopenharmony_ci eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), 533bf215546Sopenharmony_ci _mm_and_si128(dcdx_neg_mask, dcdx)); 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci /* 538bf215546Sopenharmony_ci * Pointless transpose which gets undone immediately in 539bf215546Sopenharmony_ci * rasterization. 540bf215546Sopenharmony_ci * It is actually difficult to do away with it - would essentially 541bf215546Sopenharmony_ci * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations 542bf215546Sopenharmony_ci * for this then would need to depend on the number of planes. 543bf215546Sopenharmony_ci * The transpose is quite special here due to c being 64bit... 544bf215546Sopenharmony_ci * The store has to be unaligned (unless we'd make the plane size 545bf215546Sopenharmony_ci * a multiple of 128), and of course storing eo separately... 546bf215546Sopenharmony_ci */ 547bf215546Sopenharmony_ci c01 = _mm_unpacklo_epi64(c02, c13); 548bf215546Sopenharmony_ci c23 = _mm_unpackhi_epi64(c02, c13); 549bf215546Sopenharmony_ci transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy, 550bf215546Sopenharmony_ci &p0, &p1, &p2, &unused); 551bf215546Sopenharmony_ci _mm_storeu_si128((__m128i *)&plane[0], p0); 552bf215546Sopenharmony_ci plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo); 553bf215546Sopenharmony_ci _mm_storeu_si128((__m128i *)&plane[1], p1); 554bf215546Sopenharmony_ci eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1)); 555bf215546Sopenharmony_ci plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo); 556bf215546Sopenharmony_ci _mm_storeu_si128((__m128i *)&plane[2], p2); 557bf215546Sopenharmony_ci eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2)); 558bf215546Sopenharmony_ci plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo); 559bf215546Sopenharmony_ci } else 560bf215546Sopenharmony_ci#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN 561bf215546Sopenharmony_ci /* 562bf215546Sopenharmony_ci * XXX this code is effectively disabled for all practical purposes, 563bf215546Sopenharmony_ci * as the allowed fb size is tiny if FIXED_ORDER is 8. 564bf215546Sopenharmony_ci */ 565bf215546Sopenharmony_ci if (setup->fb.width <= MAX_FIXED_LENGTH32 && 566bf215546Sopenharmony_ci setup->fb.height <= MAX_FIXED_LENGTH32 && 567bf215546Sopenharmony_ci pwr8_limit_check) { 568bf215546Sopenharmony_ci unsigned int bottom_edge; 569bf215546Sopenharmony_ci __m128i vertx, verty; 570bf215546Sopenharmony_ci __m128i shufx, shufy; 571bf215546Sopenharmony_ci __m128i dcdx, dcdy, c; 572bf215546Sopenharmony_ci __m128i unused; 573bf215546Sopenharmony_ci __m128i dcdx_neg_mask; 574bf215546Sopenharmony_ci __m128i dcdy_neg_mask; 575bf215546Sopenharmony_ci __m128i dcdx_zero_mask; 576bf215546Sopenharmony_ci __m128i top_left_flag; 577bf215546Sopenharmony_ci __m128i c_inc_mask, c_inc; 578bf215546Sopenharmony_ci __m128i eo, p0, p1, p2; 579bf215546Sopenharmony_ci __m128i_union vshuf_mask; 580bf215546Sopenharmony_ci __m128i zero = vec_splats((unsigned char) 0); 581bf215546Sopenharmony_ci alignas(16) int32_t temp_vec[4]; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci#if UTIL_ARCH_LITTLE_ENDIAN 584bf215546Sopenharmony_ci vshuf_mask.i[0] = 0x07060504; 585bf215546Sopenharmony_ci vshuf_mask.i[1] = 0x0B0A0908; 586bf215546Sopenharmony_ci vshuf_mask.i[2] = 0x03020100; 587bf215546Sopenharmony_ci vshuf_mask.i[3] = 0x0F0E0D0C; 588bf215546Sopenharmony_ci#else 589bf215546Sopenharmony_ci vshuf_mask.i[0] = 0x00010203; 590bf215546Sopenharmony_ci vshuf_mask.i[1] = 0x0C0D0E0F; 591bf215546Sopenharmony_ci vshuf_mask.i[2] = 0x04050607; 592bf215546Sopenharmony_ci vshuf_mask.i[3] = 0x08090A0B; 593bf215546Sopenharmony_ci#endif 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci /* vertex x coords */ 596bf215546Sopenharmony_ci vertx = vec_load_si128((const uint32_t *) position->x); 597bf215546Sopenharmony_ci /* vertex y coords */ 598bf215546Sopenharmony_ci verty = vec_load_si128((const uint32_t *) position->y); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci shufx = vec_perm (vertx, vertx, vshuf_mask.m128i); 601bf215546Sopenharmony_ci shufy = vec_perm (verty, verty, vshuf_mask.m128i); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci dcdx = vec_sub_epi32(verty, shufy); 604bf215546Sopenharmony_ci dcdy = vec_sub_epi32(vertx, shufx); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci dcdx_neg_mask = vec_srai_epi32(dcdx, 31); 607bf215546Sopenharmony_ci dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero); 608bf215546Sopenharmony_ci dcdy_neg_mask = vec_srai_epi32(dcdy, 31); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0; 611bf215546Sopenharmony_ci top_left_flag = (__m128i) vec_splats(bottom_edge); 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci c_inc_mask = vec_or(dcdx_neg_mask, 614bf215546Sopenharmony_ci vec_and(dcdx_zero_mask, 615bf215546Sopenharmony_ci vec_xor(dcdy_neg_mask, 616bf215546Sopenharmony_ci top_left_flag))); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci c_inc = vec_srli_epi32(c_inc_mask, 31); 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx), 621bf215546Sopenharmony_ci vec_mullo_epi32(dcdy, verty)); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci c = vec_add_epi32(c, c_inc); 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci /* Scale up to match c: 626bf215546Sopenharmony_ci */ 627bf215546Sopenharmony_ci dcdx = vec_slli_epi32(dcdx, FIXED_ORDER); 628bf215546Sopenharmony_ci dcdy = vec_slli_epi32(dcdy, FIXED_ORDER); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci /* Calculate trivial reject values: 631bf215546Sopenharmony_ci */ 632bf215546Sopenharmony_ci eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy), 633bf215546Sopenharmony_ci vec_and(dcdx_neg_mask, dcdx)); 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci /* Pointless transpose which gets undone immediately in 638bf215546Sopenharmony_ci * rasterization: 639bf215546Sopenharmony_ci */ 640bf215546Sopenharmony_ci transpose4_epi32(&c, &dcdx, &dcdy, &eo, 641bf215546Sopenharmony_ci &p0, &p1, &p2, &unused); 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci#define STORE_PLANE(plane, vec) do { \ 644bf215546Sopenharmony_ci vec_store_si128((uint32_t *)&temp_vec, vec); \ 645bf215546Sopenharmony_ci plane.c = (int64_t)temp_vec[0]; \ 646bf215546Sopenharmony_ci plane.dcdx = temp_vec[1]; \ 647bf215546Sopenharmony_ci plane.dcdy = temp_vec[2]; \ 648bf215546Sopenharmony_ci plane.eo = temp_vec[3]; \ 649bf215546Sopenharmony_ci } while(0) 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci STORE_PLANE(plane[0], p0); 652bf215546Sopenharmony_ci STORE_PLANE(plane[1], p1); 653bf215546Sopenharmony_ci STORE_PLANE(plane[2], p2); 654bf215546Sopenharmony_ci#undef STORE_PLANE 655bf215546Sopenharmony_ci } else 656bf215546Sopenharmony_ci#endif 657bf215546Sopenharmony_ci { 658bf215546Sopenharmony_ci plane[0].dcdy = position->dx01; 659bf215546Sopenharmony_ci plane[1].dcdy = position->x[1] - position->x[2]; 660bf215546Sopenharmony_ci plane[2].dcdy = position->dx20; 661bf215546Sopenharmony_ci plane[0].dcdx = position->dy01; 662bf215546Sopenharmony_ci plane[1].dcdx = position->y[1] - position->y[2]; 663bf215546Sopenharmony_ci plane[2].dcdx = position->dy20; 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 666bf215546Sopenharmony_ci /* half-edge constants, will be iterated over the whole render 667bf215546Sopenharmony_ci * target. 668bf215546Sopenharmony_ci */ 669bf215546Sopenharmony_ci plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) - 670bf215546Sopenharmony_ci IMUL64(plane[i].dcdy, position->y[i]); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci /* correct for top-left vs. bottom-left fill convention. 673bf215546Sopenharmony_ci */ 674bf215546Sopenharmony_ci if (plane[i].dcdx < 0) { 675bf215546Sopenharmony_ci /* both fill conventions want this - adjust for left edges */ 676bf215546Sopenharmony_ci plane[i].c++; 677bf215546Sopenharmony_ci } 678bf215546Sopenharmony_ci else if (plane[i].dcdx == 0) { 679bf215546Sopenharmony_ci if (setup->bottom_edge_rule == 0) { 680bf215546Sopenharmony_ci /* correct for top-left fill convention: 681bf215546Sopenharmony_ci */ 682bf215546Sopenharmony_ci if (plane[i].dcdy > 0) 683bf215546Sopenharmony_ci plane[i].c++; 684bf215546Sopenharmony_ci } else { 685bf215546Sopenharmony_ci /* correct for bottom-left fill convention: 686bf215546Sopenharmony_ci */ 687bf215546Sopenharmony_ci if (plane[i].dcdy < 0) 688bf215546Sopenharmony_ci plane[i].c++; 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci } 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci /* Scale up to match c: 693bf215546Sopenharmony_ci */ 694bf215546Sopenharmony_ci assert((plane[i].dcdx << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdx); 695bf215546Sopenharmony_ci assert((plane[i].dcdy << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdy); 696bf215546Sopenharmony_ci plane[i].dcdx <<= FIXED_ORDER; 697bf215546Sopenharmony_ci plane[i].dcdy <<= FIXED_ORDER; 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci /* find trivial reject offsets for each edge for a single-pixel 700bf215546Sopenharmony_ci * sized block. These will be scaled up at each recursive level to 701bf215546Sopenharmony_ci * match the active blocksize. Scaling in this way works best if 702bf215546Sopenharmony_ci * the blocks are square. 703bf215546Sopenharmony_ci */ 704bf215546Sopenharmony_ci plane[i].eo = 0; 705bf215546Sopenharmony_ci if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; 706bf215546Sopenharmony_ci if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; 707bf215546Sopenharmony_ci } 708bf215546Sopenharmony_ci } 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci if (0) { 711bf215546Sopenharmony_ci debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n", 712bf215546Sopenharmony_ci plane[0].c, 713bf215546Sopenharmony_ci plane[0].dcdx, 714bf215546Sopenharmony_ci plane[0].dcdy, 715bf215546Sopenharmony_ci plane[0].eo); 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n", 718bf215546Sopenharmony_ci plane[1].c, 719bf215546Sopenharmony_ci plane[1].dcdx, 720bf215546Sopenharmony_ci plane[1].dcdy, 721bf215546Sopenharmony_ci plane[1].eo); 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n", 724bf215546Sopenharmony_ci plane[2].c, 725bf215546Sopenharmony_ci plane[2].dcdx, 726bf215546Sopenharmony_ci plane[2].dcdy, 727bf215546Sopenharmony_ci plane[2].eo); 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci if (nr_planes > 3) { 731bf215546Sopenharmony_ci lp_setup_add_scissor_planes(scissor, &plane[3], s_planes, setup->multisample); 732bf215546Sopenharmony_ci } 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci return lp_setup_bin_triangle(setup, tri, use_32bits, 735bf215546Sopenharmony_ci check_opaque(setup, v0, v1, v2), 736bf215546Sopenharmony_ci &bbox, nr_planes, viewport_index); 737bf215546Sopenharmony_ci} 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci/* 740bf215546Sopenharmony_ci * Round to nearest less or equal power of two of the input. 741bf215546Sopenharmony_ci * 742bf215546Sopenharmony_ci * Undefined if no bit set exists, so code should check against 0 first. 743bf215546Sopenharmony_ci */ 744bf215546Sopenharmony_cistatic inline uint32_t 745bf215546Sopenharmony_cifloor_pot(uint32_t n) 746bf215546Sopenharmony_ci{ 747bf215546Sopenharmony_ci#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) 748bf215546Sopenharmony_ci if (n == 0) 749bf215546Sopenharmony_ci return 0; 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci __asm__("bsr %1,%0" 752bf215546Sopenharmony_ci : "=r" (n) 753bf215546Sopenharmony_ci : "rm" (n) 754bf215546Sopenharmony_ci : "cc"); 755bf215546Sopenharmony_ci return 1 << n; 756bf215546Sopenharmony_ci#else 757bf215546Sopenharmony_ci n |= (n >> 1); 758bf215546Sopenharmony_ci n |= (n >> 2); 759bf215546Sopenharmony_ci n |= (n >> 4); 760bf215546Sopenharmony_ci n |= (n >> 8); 761bf215546Sopenharmony_ci n |= (n >> 16); 762bf215546Sopenharmony_ci return n - (n >> 1); 763bf215546Sopenharmony_ci#endif 764bf215546Sopenharmony_ci} 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ciboolean 768bf215546Sopenharmony_cilp_setup_bin_triangle(struct lp_setup_context *setup, 769bf215546Sopenharmony_ci struct lp_rast_triangle *tri, 770bf215546Sopenharmony_ci boolean use_32bits, 771bf215546Sopenharmony_ci boolean opaque, 772bf215546Sopenharmony_ci const struct u_rect *bbox, 773bf215546Sopenharmony_ci int nr_planes, 774bf215546Sopenharmony_ci unsigned viewport_index) 775bf215546Sopenharmony_ci{ 776bf215546Sopenharmony_ci struct lp_scene *scene = setup->scene; 777bf215546Sopenharmony_ci unsigned cmd; 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci /* What is the largest power-of-two boundary this triangle crosses: 780bf215546Sopenharmony_ci */ 781bf215546Sopenharmony_ci const int dx = floor_pot((bbox->x0 ^ bbox->x1) | 782bf215546Sopenharmony_ci (bbox->y0 ^ bbox->y1)); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci /* The largest dimension of the rasterized area of the triangle 785bf215546Sopenharmony_ci * (aligned to a 4x4 grid), rounded down to the nearest power of two: 786bf215546Sopenharmony_ci */ 787bf215546Sopenharmony_ci const int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) | 788bf215546Sopenharmony_ci (bbox->y1 - (bbox->y0 & ~3))); 789bf215546Sopenharmony_ci const int sz = floor_pot(max_sz); 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci /* 792bf215546Sopenharmony_ci * NOTE: It is important to use the original bounding box 793bf215546Sopenharmony_ci * which might contain negative values here, because if the 794bf215546Sopenharmony_ci * plane math may overflow or not with the 32bit rasterization 795bf215546Sopenharmony_ci * functions depends on the original extent of the triangle. 796bf215546Sopenharmony_ci */ 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci /* Now apply scissor, etc to the bounding box. Could do this 799bf215546Sopenharmony_ci * earlier, but it confuses the logic for tri-16 and would force 800bf215546Sopenharmony_ci * the rasterizer to also respect scissor, etc, just for the rare 801bf215546Sopenharmony_ci * cases where a small triangle extends beyond the scissor. 802bf215546Sopenharmony_ci */ 803bf215546Sopenharmony_ci struct u_rect trimmed_box = *bbox; 804bf215546Sopenharmony_ci u_rect_find_intersection(&setup->draw_regions[viewport_index], 805bf215546Sopenharmony_ci &trimmed_box); 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci /* Determine which tile(s) intersect the triangle's bounding box 808bf215546Sopenharmony_ci */ 809bf215546Sopenharmony_ci if (dx < TILE_SIZE) { 810bf215546Sopenharmony_ci const int ix0 = bbox->x0 / TILE_SIZE; 811bf215546Sopenharmony_ci const int iy0 = bbox->y0 / TILE_SIZE; 812bf215546Sopenharmony_ci unsigned px = bbox->x0 & 63 & ~3; 813bf215546Sopenharmony_ci unsigned py = bbox->y0 & 63 & ~3; 814bf215546Sopenharmony_ci 815bf215546Sopenharmony_ci assert(iy0 == bbox->y1 / TILE_SIZE && 816bf215546Sopenharmony_ci ix0 == bbox->x1 / TILE_SIZE); 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci if (nr_planes == 3) { 819bf215546Sopenharmony_ci if (sz < 4) { 820bf215546Sopenharmony_ci /* Triangle is contained in a single 4x4 stamp: 821bf215546Sopenharmony_ci */ 822bf215546Sopenharmony_ci assert(px + 4 <= TILE_SIZE); 823bf215546Sopenharmony_ci assert(py + 4 <= TILE_SIZE); 824bf215546Sopenharmony_ci if (setup->multisample) 825bf215546Sopenharmony_ci cmd = LP_RAST_OP_MS_TRIANGLE_3_4; 826bf215546Sopenharmony_ci else 827bf215546Sopenharmony_ci cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_4 : LP_RAST_OP_TRIANGLE_3_4; 828bf215546Sopenharmony_ci return lp_scene_bin_cmd_with_state(scene, ix0, iy0, 829bf215546Sopenharmony_ci setup->fs.stored, cmd, 830bf215546Sopenharmony_ci lp_rast_arg_triangle_contained(tri, px, py)); 831bf215546Sopenharmony_ci } 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci if (sz < 16) { 834bf215546Sopenharmony_ci /* Triangle is contained in a single 16x16 block: 835bf215546Sopenharmony_ci */ 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci /* 838bf215546Sopenharmony_ci * The 16x16 block is only 4x4 aligned, and can exceed the tile 839bf215546Sopenharmony_ci * dimensions if the triangle is 16 pixels in one dimension but 4 840bf215546Sopenharmony_ci * in the other. So budge the 16x16 back inside the tile. 841bf215546Sopenharmony_ci */ 842bf215546Sopenharmony_ci px = MIN2(px, TILE_SIZE - 16); 843bf215546Sopenharmony_ci py = MIN2(py, TILE_SIZE - 16); 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci assert(px + 16 <= TILE_SIZE); 846bf215546Sopenharmony_ci assert(py + 16 <= TILE_SIZE); 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci if (setup->multisample) 849bf215546Sopenharmony_ci cmd = LP_RAST_OP_MS_TRIANGLE_3_16; 850bf215546Sopenharmony_ci else 851bf215546Sopenharmony_ci cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_16 : LP_RAST_OP_TRIANGLE_3_16; 852bf215546Sopenharmony_ci return lp_scene_bin_cmd_with_state(scene, ix0, iy0, 853bf215546Sopenharmony_ci setup->fs.stored, cmd, 854bf215546Sopenharmony_ci lp_rast_arg_triangle_contained(tri, px, py)); 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci } else if (nr_planes == 4 && sz < 16) { 857bf215546Sopenharmony_ci px = MIN2(px, TILE_SIZE - 16); 858bf215546Sopenharmony_ci py = MIN2(py, TILE_SIZE - 16); 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci assert(px + 16 <= TILE_SIZE); 861bf215546Sopenharmony_ci assert(py + 16 <= TILE_SIZE); 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci if (setup->multisample) 864bf215546Sopenharmony_ci cmd = LP_RAST_OP_MS_TRIANGLE_4_16; 865bf215546Sopenharmony_ci else 866bf215546Sopenharmony_ci cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_4_16 : LP_RAST_OP_TRIANGLE_4_16; 867bf215546Sopenharmony_ci return lp_scene_bin_cmd_with_state(scene, ix0, iy0, 868bf215546Sopenharmony_ci setup->fs.stored, cmd, 869bf215546Sopenharmony_ci lp_rast_arg_triangle_contained(tri, px, py)); 870bf215546Sopenharmony_ci } 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci /* Triangle is contained in a single tile: 873bf215546Sopenharmony_ci */ 874bf215546Sopenharmony_ci if (setup->multisample) 875bf215546Sopenharmony_ci cmd = lp_rast_ms_tri_tab[nr_planes]; 876bf215546Sopenharmony_ci else 877bf215546Sopenharmony_ci cmd = use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes]; 878bf215546Sopenharmony_ci return lp_scene_bin_cmd_with_state(scene, ix0, iy0, setup->fs.stored, cmd, 879bf215546Sopenharmony_ci lp_rast_arg_triangle(tri, (1<<nr_planes)-1)); 880bf215546Sopenharmony_ci } else { 881bf215546Sopenharmony_ci struct lp_rast_plane *plane = GET_PLANES(tri); 882bf215546Sopenharmony_ci int64_t c[MAX_PLANES]; 883bf215546Sopenharmony_ci int64_t ei[MAX_PLANES]; 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci int64_t eo[MAX_PLANES]; 886bf215546Sopenharmony_ci int64_t xstep[MAX_PLANES]; 887bf215546Sopenharmony_ci int64_t ystep[MAX_PLANES]; 888bf215546Sopenharmony_ci int x, y; 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci const int ix0 = trimmed_box.x0 / TILE_SIZE; 891bf215546Sopenharmony_ci const int iy0 = trimmed_box.y0 / TILE_SIZE; 892bf215546Sopenharmony_ci const int ix1 = trimmed_box.x1 / TILE_SIZE; 893bf215546Sopenharmony_ci const int iy1 = trimmed_box.y1 / TILE_SIZE; 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci for (int i = 0; i < nr_planes; i++) { 896bf215546Sopenharmony_ci c[i] = (plane[i].c + 897bf215546Sopenharmony_ci IMUL64(plane[i].dcdy, iy0) * TILE_SIZE - 898bf215546Sopenharmony_ci IMUL64(plane[i].dcdx, ix0) * TILE_SIZE); 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci ei[i] = (plane[i].dcdy - 901bf215546Sopenharmony_ci plane[i].dcdx - 902bf215546Sopenharmony_ci (int64_t)plane[i].eo) << TILE_ORDER; 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci eo[i] = (int64_t)plane[i].eo << TILE_ORDER; 905bf215546Sopenharmony_ci xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER); 906bf215546Sopenharmony_ci ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER; 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci tri->inputs.is_blit = lp_setup_is_blit(setup, &tri->inputs); 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci /* Test tile-sized blocks against the triangle. 912bf215546Sopenharmony_ci * Discard blocks fully outside the tri. If the block is fully 913bf215546Sopenharmony_ci * contained inside the tri, bin an lp_rast_shade_tile command. 914bf215546Sopenharmony_ci * Else, bin a lp_rast_triangle command. 915bf215546Sopenharmony_ci */ 916bf215546Sopenharmony_ci for (y = iy0; y <= iy1; y++) { 917bf215546Sopenharmony_ci boolean in = FALSE; /* are we inside the triangle? */ 918bf215546Sopenharmony_ci int64_t cx[MAX_PLANES]; 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci for (int i = 0; i < nr_planes; i++) 921bf215546Sopenharmony_ci cx[i] = c[i]; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci for (x = ix0; x <= ix1; x++) { 924bf215546Sopenharmony_ci int out = 0; 925bf215546Sopenharmony_ci int partial = 0; 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci for (int i = 0; i < nr_planes; i++) { 928bf215546Sopenharmony_ci int64_t planeout = cx[i] + eo[i]; 929bf215546Sopenharmony_ci int64_t planepartial = cx[i] + ei[i] - 1; 930bf215546Sopenharmony_ci out |= (int) (planeout >> 63); 931bf215546Sopenharmony_ci partial |= ((int) (planepartial >> 63)) & (1<<i); 932bf215546Sopenharmony_ci } 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci if (out) { 935bf215546Sopenharmony_ci /* do nothing */ 936bf215546Sopenharmony_ci if (in) 937bf215546Sopenharmony_ci break; /* exiting triangle, all done with this row */ 938bf215546Sopenharmony_ci LP_COUNT(nr_empty_64); 939bf215546Sopenharmony_ci } else if (partial) { 940bf215546Sopenharmony_ci /* Not trivially accepted by at least one plane - 941bf215546Sopenharmony_ci * rasterize/shade partial tile 942bf215546Sopenharmony_ci */ 943bf215546Sopenharmony_ci int count = util_bitcount(partial); 944bf215546Sopenharmony_ci in = TRUE; 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci if (setup->multisample) 947bf215546Sopenharmony_ci cmd = lp_rast_ms_tri_tab[count]; 948bf215546Sopenharmony_ci else 949bf215546Sopenharmony_ci cmd = use_32bits ? lp_rast_32_tri_tab[count] : lp_rast_tri_tab[count]; 950bf215546Sopenharmony_ci if (!lp_scene_bin_cmd_with_state(scene, x, y, 951bf215546Sopenharmony_ci setup->fs.stored, cmd, 952bf215546Sopenharmony_ci lp_rast_arg_triangle(tri, partial))) 953bf215546Sopenharmony_ci goto fail; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci LP_COUNT(nr_partially_covered_64); 956bf215546Sopenharmony_ci } else { 957bf215546Sopenharmony_ci /* triangle covers the whole tile- shade whole tile */ 958bf215546Sopenharmony_ci LP_COUNT(nr_fully_covered_64); 959bf215546Sopenharmony_ci in = TRUE; 960bf215546Sopenharmony_ci if (!lp_setup_whole_tile(setup, &tri->inputs, x, y, opaque)) 961bf215546Sopenharmony_ci goto fail; 962bf215546Sopenharmony_ci } 963bf215546Sopenharmony_ci 964bf215546Sopenharmony_ci /* Iterate cx values across the region: */ 965bf215546Sopenharmony_ci for (int i = 0; i < nr_planes; i++) 966bf215546Sopenharmony_ci cx[i] += xstep[i]; 967bf215546Sopenharmony_ci } 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci /* Iterate c values down the region: */ 970bf215546Sopenharmony_ci for (int i = 0; i < nr_planes; i++) 971bf215546Sopenharmony_ci c[i] += ystep[i]; 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci } 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci return TRUE; 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_cifail: 978bf215546Sopenharmony_ci /* Need to disable any partially binned triangle. This is easier 979bf215546Sopenharmony_ci * than trying to locate all the triangle, shade-tile, etc, 980bf215546Sopenharmony_ci * commands which may have been binned. 981bf215546Sopenharmony_ci */ 982bf215546Sopenharmony_ci tri->inputs.disable = TRUE; 983bf215546Sopenharmony_ci return FALSE; 984bf215546Sopenharmony_ci} 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci/** 988bf215546Sopenharmony_ci * Try to draw the triangle, restart the scene on failure. 989bf215546Sopenharmony_ci */ 990bf215546Sopenharmony_cistatic inline void 991bf215546Sopenharmony_ciretry_triangle_ccw(struct lp_setup_context *setup, 992bf215546Sopenharmony_ci struct fixed_position *position, 993bf215546Sopenharmony_ci const float (*v0)[4], 994bf215546Sopenharmony_ci const float (*v1)[4], 995bf215546Sopenharmony_ci const float (*v2)[4], 996bf215546Sopenharmony_ci boolean front) 997bf215546Sopenharmony_ci{ 998bf215546Sopenharmony_ci if (!do_triangle_ccw(setup, position, v0, v1, v2, front)) { 999bf215546Sopenharmony_ci if (!lp_setup_flush_and_restart(setup)) 1000bf215546Sopenharmony_ci return; 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci if (!do_triangle_ccw(setup, position, v0, v1, v2, front)) 1003bf215546Sopenharmony_ci return; 1004bf215546Sopenharmony_ci } 1005bf215546Sopenharmony_ci} 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci/** 1009bf215546Sopenharmony_ci * Calculate fixed position data for a triangle 1010bf215546Sopenharmony_ci * It is unfortunate we need to do that here (as we need area 1011bf215546Sopenharmony_ci * calculated in fixed point), as there's quite some code duplication 1012bf215546Sopenharmony_ci * to what is done in the jit setup prog. 1013bf215546Sopenharmony_ci */ 1014bf215546Sopenharmony_cistatic inline int8_t 1015bf215546Sopenharmony_cicalc_fixed_position(struct lp_setup_context *setup, 1016bf215546Sopenharmony_ci struct fixed_position* position, 1017bf215546Sopenharmony_ci const float (*v0)[4], 1018bf215546Sopenharmony_ci const float (*v1)[4], 1019bf215546Sopenharmony_ci const float (*v2)[4]) 1020bf215546Sopenharmony_ci{ 1021bf215546Sopenharmony_ci float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset; 1022bf215546Sopenharmony_ci /* 1023bf215546Sopenharmony_ci * The rounding may not be quite the same with PIPE_ARCH_SSE 1024bf215546Sopenharmony_ci * (util_iround right now only does nearest/even on x87, 1025bf215546Sopenharmony_ci * otherwise nearest/away-from-zero). 1026bf215546Sopenharmony_ci * Both should be acceptable, I think. 1027bf215546Sopenharmony_ci */ 1028bf215546Sopenharmony_ci#if defined(PIPE_ARCH_SSE) 1029bf215546Sopenharmony_ci __m128 v0r, v1r; 1030bf215546Sopenharmony_ci __m128 vxy0xy2, vxy1xy0; 1031bf215546Sopenharmony_ci __m128i vxy0xy2i, vxy1xy0i; 1032bf215546Sopenharmony_ci __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120; 1033bf215546Sopenharmony_ci __m128 pix_offset = _mm_set1_ps(pixel_offset); 1034bf215546Sopenharmony_ci __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE); 1035bf215546Sopenharmony_ci v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0])); 1036bf215546Sopenharmony_ci vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]); 1037bf215546Sopenharmony_ci v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0])); 1038bf215546Sopenharmony_ci vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2); 1039bf215546Sopenharmony_ci vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset); 1040bf215546Sopenharmony_ci vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset); 1041bf215546Sopenharmony_ci vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one); 1042bf215546Sopenharmony_ci vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one); 1043bf215546Sopenharmony_ci vxy0xy2i = _mm_cvtps_epi32(vxy0xy2); 1044bf215546Sopenharmony_ci vxy1xy0i = _mm_cvtps_epi32(vxy1xy0); 1045bf215546Sopenharmony_ci dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i); 1046bf215546Sopenharmony_ci _mm_store_si128((__m128i *)&position->dx01, dxdy0120); 1047bf215546Sopenharmony_ci /* 1048bf215546Sopenharmony_ci * For the mul, would need some more shuffles, plus emulation 1049bf215546Sopenharmony_ci * for the signed mul (without sse41), so don't bother. 1050bf215546Sopenharmony_ci */ 1051bf215546Sopenharmony_ci x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0)); 1052bf215546Sopenharmony_ci x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0)); 1053bf215546Sopenharmony_ci x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0); 1054bf215546Sopenharmony_ci y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0); 1055bf215546Sopenharmony_ci _mm_store_si128((__m128i *)&position->x[0], x0120); 1056bf215546Sopenharmony_ci _mm_store_si128((__m128i *)&position->y[0], y0120); 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci#else 1059bf215546Sopenharmony_ci position->x[0] = subpixel_snap(v0[0][0] - pixel_offset); 1060bf215546Sopenharmony_ci position->x[1] = subpixel_snap(v1[0][0] - pixel_offset); 1061bf215546Sopenharmony_ci position->x[2] = subpixel_snap(v2[0][0] - pixel_offset); 1062bf215546Sopenharmony_ci position->x[3] = 0; // should be unused 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci position->y[0] = subpixel_snap(v0[0][1] - pixel_offset); 1065bf215546Sopenharmony_ci position->y[1] = subpixel_snap(v1[0][1] - pixel_offset); 1066bf215546Sopenharmony_ci position->y[2] = subpixel_snap(v2[0][1] - pixel_offset); 1067bf215546Sopenharmony_ci position->y[3] = 0; // should be unused 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci position->dx01 = position->x[0] - position->x[1]; 1070bf215546Sopenharmony_ci position->dy01 = position->y[0] - position->y[1]; 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci position->dx20 = position->x[2] - position->x[0]; 1073bf215546Sopenharmony_ci position->dy20 = position->y[2] - position->y[0]; 1074bf215546Sopenharmony_ci#endif 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci uint64_t area = IMUL64(position->dx01, position->dy20) - 1077bf215546Sopenharmony_ci IMUL64(position->dx20, position->dy01); 1078bf215546Sopenharmony_ci return area == 0 ? 0 : (area & (1ULL << 63)) ? -1 : 1; 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci/** 1083bf215546Sopenharmony_ci * Rotate a triangle, flipping its clockwise direction, 1084bf215546Sopenharmony_ci * Swaps values for xy[0] and xy[1] 1085bf215546Sopenharmony_ci */ 1086bf215546Sopenharmony_cistatic inline void 1087bf215546Sopenharmony_cirotate_fixed_position_01(struct fixed_position* position) 1088bf215546Sopenharmony_ci{ 1089bf215546Sopenharmony_ci int x = position->x[1]; 1090bf215546Sopenharmony_ci int y = position->y[1]; 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci position->x[1] = position->x[0]; 1093bf215546Sopenharmony_ci position->y[1] = position->y[0]; 1094bf215546Sopenharmony_ci position->x[0] = x; 1095bf215546Sopenharmony_ci position->y[0] = y; 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_ci position->dx01 = -position->dx01; 1098bf215546Sopenharmony_ci position->dy01 = -position->dy01; 1099bf215546Sopenharmony_ci position->dx20 = position->x[2] - position->x[0]; 1100bf215546Sopenharmony_ci position->dy20 = position->y[2] - position->y[0]; 1101bf215546Sopenharmony_ci} 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci/** 1105bf215546Sopenharmony_ci * Rotate a triangle, flipping its clockwise direction, 1106bf215546Sopenharmony_ci * Swaps values for xy[1] and xy[2] 1107bf215546Sopenharmony_ci */ 1108bf215546Sopenharmony_cistatic inline void 1109bf215546Sopenharmony_cirotate_fixed_position_12(struct fixed_position* position) 1110bf215546Sopenharmony_ci{ 1111bf215546Sopenharmony_ci int x = position->x[2]; 1112bf215546Sopenharmony_ci int y = position->y[2]; 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci position->x[2] = position->x[1]; 1115bf215546Sopenharmony_ci position->y[2] = position->y[1]; 1116bf215546Sopenharmony_ci position->x[1] = x; 1117bf215546Sopenharmony_ci position->y[1] = y; 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci x = position->dx01; 1120bf215546Sopenharmony_ci y = position->dy01; 1121bf215546Sopenharmony_ci position->dx01 = -position->dx20; 1122bf215546Sopenharmony_ci position->dy01 = -position->dy20; 1123bf215546Sopenharmony_ci position->dx20 = -x; 1124bf215546Sopenharmony_ci position->dy20 = -y; 1125bf215546Sopenharmony_ci} 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci/** 1129bf215546Sopenharmony_ci * Draw triangle if it's CW, cull otherwise. 1130bf215546Sopenharmony_ci */ 1131bf215546Sopenharmony_cistatic void 1132bf215546Sopenharmony_citriangle_cw(struct lp_setup_context *setup, 1133bf215546Sopenharmony_ci const float (*v0)[4], 1134bf215546Sopenharmony_ci const float (*v1)[4], 1135bf215546Sopenharmony_ci const float (*v2)[4]) 1136bf215546Sopenharmony_ci{ 1137bf215546Sopenharmony_ci alignas(16) struct fixed_position position; 1138bf215546Sopenharmony_ci struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci if (lp_context->active_statistics_queries) { 1141bf215546Sopenharmony_ci lp_context->pipeline_statistics.c_primitives++; 1142bf215546Sopenharmony_ci } 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2); 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci if (area_sign < 0) { 1147bf215546Sopenharmony_ci if (setup->flatshade_first) { 1148bf215546Sopenharmony_ci rotate_fixed_position_12(&position); 1149bf215546Sopenharmony_ci retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface); 1150bf215546Sopenharmony_ci } else { 1151bf215546Sopenharmony_ci rotate_fixed_position_01(&position); 1152bf215546Sopenharmony_ci retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface); 1153bf215546Sopenharmony_ci } 1154bf215546Sopenharmony_ci } 1155bf215546Sopenharmony_ci} 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_cistatic void 1159bf215546Sopenharmony_citriangle_ccw(struct lp_setup_context *setup, 1160bf215546Sopenharmony_ci const float (*v0)[4], 1161bf215546Sopenharmony_ci const float (*v1)[4], 1162bf215546Sopenharmony_ci const float (*v2)[4]) 1163bf215546Sopenharmony_ci{ 1164bf215546Sopenharmony_ci alignas(16) struct fixed_position position; 1165bf215546Sopenharmony_ci struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci if (lp_context->active_statistics_queries) { 1168bf215546Sopenharmony_ci lp_context->pipeline_statistics.c_primitives++; 1169bf215546Sopenharmony_ci } 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2); 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci if (area_sign > 0) 1174bf215546Sopenharmony_ci retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface); 1175bf215546Sopenharmony_ci} 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci/** 1179bf215546Sopenharmony_ci * Draw triangle whether it's CW or CCW. 1180bf215546Sopenharmony_ci */ 1181bf215546Sopenharmony_cistatic void 1182bf215546Sopenharmony_citriangle_both(struct lp_setup_context *setup, 1183bf215546Sopenharmony_ci const float (*v0)[4], 1184bf215546Sopenharmony_ci const float (*v1)[4], 1185bf215546Sopenharmony_ci const float (*v2)[4]) 1186bf215546Sopenharmony_ci{ 1187bf215546Sopenharmony_ci alignas(16) struct fixed_position position; 1188bf215546Sopenharmony_ci struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci if (lp_context->active_statistics_queries) { 1191bf215546Sopenharmony_ci lp_context->pipeline_statistics.c_primitives++; 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2); 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci if (0) { 1197bf215546Sopenharmony_ci assert(!util_is_inf_or_nan(v0[0][0])); 1198bf215546Sopenharmony_ci assert(!util_is_inf_or_nan(v0[0][1])); 1199bf215546Sopenharmony_ci assert(!util_is_inf_or_nan(v1[0][0])); 1200bf215546Sopenharmony_ci assert(!util_is_inf_or_nan(v1[0][1])); 1201bf215546Sopenharmony_ci assert(!util_is_inf_or_nan(v2[0][0])); 1202bf215546Sopenharmony_ci assert(!util_is_inf_or_nan(v2[0][1])); 1203bf215546Sopenharmony_ci } 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci if (area_sign > 0) { 1206bf215546Sopenharmony_ci retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface); 1207bf215546Sopenharmony_ci } else if (area_sign < 0) { 1208bf215546Sopenharmony_ci if (setup->flatshade_first) { 1209bf215546Sopenharmony_ci rotate_fixed_position_12(&position); 1210bf215546Sopenharmony_ci retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface); 1211bf215546Sopenharmony_ci } else { 1212bf215546Sopenharmony_ci rotate_fixed_position_01(&position); 1213bf215546Sopenharmony_ci retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface); 1214bf215546Sopenharmony_ci } 1215bf215546Sopenharmony_ci } 1216bf215546Sopenharmony_ci} 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_cistatic void 1220bf215546Sopenharmony_citriangle_noop(struct lp_setup_context *setup, 1221bf215546Sopenharmony_ci const float (*v0)[4], 1222bf215546Sopenharmony_ci const float (*v1)[4], 1223bf215546Sopenharmony_ci const float (*v2)[4]) 1224bf215546Sopenharmony_ci{ 1225bf215546Sopenharmony_ci} 1226bf215546Sopenharmony_ci 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_civoid 1229bf215546Sopenharmony_cilp_setup_choose_triangle(struct lp_setup_context *setup) 1230bf215546Sopenharmony_ci{ 1231bf215546Sopenharmony_ci if (setup->rasterizer_discard) { 1232bf215546Sopenharmony_ci setup->triangle = triangle_noop; 1233bf215546Sopenharmony_ci return; 1234bf215546Sopenharmony_ci } 1235bf215546Sopenharmony_ci switch (setup->cullmode) { 1236bf215546Sopenharmony_ci case PIPE_FACE_NONE: 1237bf215546Sopenharmony_ci setup->triangle = triangle_both; 1238bf215546Sopenharmony_ci break; 1239bf215546Sopenharmony_ci case PIPE_FACE_BACK: 1240bf215546Sopenharmony_ci setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw; 1241bf215546Sopenharmony_ci break; 1242bf215546Sopenharmony_ci case PIPE_FACE_FRONT: 1243bf215546Sopenharmony_ci setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw; 1244bf215546Sopenharmony_ci break; 1245bf215546Sopenharmony_ci default: 1246bf215546Sopenharmony_ci setup->triangle = triangle_noop; 1247bf215546Sopenharmony_ci break; 1248bf215546Sopenharmony_ci } 1249bf215546Sopenharmony_ci} 1250