1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2014 Scott Mansell 3bf215546Sopenharmony_ci * Copyright © 2014 Broadcom 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "util/u_blitter.h" 26bf215546Sopenharmony_ci#include "util/u_draw.h" 27bf215546Sopenharmony_ci#include "util/u_prim.h" 28bf215546Sopenharmony_ci#include "util/format/u_format.h" 29bf215546Sopenharmony_ci#include "util/u_pack_color.h" 30bf215546Sopenharmony_ci#include "util/u_split_draw.h" 31bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include "vc4_context.h" 34bf215546Sopenharmony_ci#include "vc4_resource.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#define VC4_HW_2116_COUNT 0x1ef0 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistatic void 39bf215546Sopenharmony_civc4_get_draw_cl_space(struct vc4_job *job, int vert_count) 40bf215546Sopenharmony_ci{ 41bf215546Sopenharmony_ci /* The SW-5891 workaround may cause us to emit multiple shader recs 42bf215546Sopenharmony_ci * and draw packets. 43bf215546Sopenharmony_ci */ 44bf215546Sopenharmony_ci int num_draws = DIV_ROUND_UP(vert_count, 65535 - 2) + 1; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci /* Binner gets our packet state -- vc4_emit.c contents, 47bf215546Sopenharmony_ci * and the primitive itself. 48bf215546Sopenharmony_ci */ 49bf215546Sopenharmony_ci cl_ensure_space(&job->bcl, 50bf215546Sopenharmony_ci 256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE + 51bf215546Sopenharmony_ci VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws); 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* Nothing for rcl -- that's covered by vc4_context.c */ 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci /* shader_rec gets up to 12 dwords of reloc handles plus a maximally 56bf215546Sopenharmony_ci * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of 57bf215546Sopenharmony_ci * vattr stride). 58bf215546Sopenharmony_ci */ 59bf215546Sopenharmony_ci cl_ensure_space(&job->shader_rec, 60bf215546Sopenharmony_ci (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws); 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci /* Uniforms are covered by vc4_write_uniforms(). */ 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci /* There could be up to 16 textures per stage, plus misc other 65bf215546Sopenharmony_ci * pointers. 66bf215546Sopenharmony_ci */ 67bf215546Sopenharmony_ci cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t)); 68bf215546Sopenharmony_ci cl_ensure_space(&job->bo_pointers, 69bf215546Sopenharmony_ci (2 * 16 + 20) * sizeof(struct vc4_bo *)); 70bf215546Sopenharmony_ci} 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci/** 73bf215546Sopenharmony_ci * Does the initial bining command list setup for drawing to a given FBO. 74bf215546Sopenharmony_ci */ 75bf215546Sopenharmony_cistatic void 76bf215546Sopenharmony_civc4_start_draw(struct vc4_context *vc4) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci struct vc4_job *job = vc4->job; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci if (job->needs_flush) 81bf215546Sopenharmony_ci return; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci vc4_get_draw_cl_space(job, 0); 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) { 86bf215546Sopenharmony_ci bin.width_in_tiles = job->draw_tiles_x; 87bf215546Sopenharmony_ci bin.height_in_tiles = job->draw_tiles_y; 88bf215546Sopenharmony_ci bin.multisample_mode_4x = job->msaa; 89bf215546Sopenharmony_ci } 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci /* START_TILE_BINNING resets the statechange counters in the hardware, 92bf215546Sopenharmony_ci * which are what is used when a primitive is binned to a tile to 93bf215546Sopenharmony_ci * figure out what new state packets need to be written to that tile's 94bf215546Sopenharmony_ci * command list. 95bf215546Sopenharmony_ci */ 96bf215546Sopenharmony_ci cl_emit(&job->bcl, START_TILE_BINNING, start); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci /* Reset the current compressed primitives format. This gets modified 99bf215546Sopenharmony_ci * by VC4_PACKET_GL_INDEXED_PRIMITIVE and 100bf215546Sopenharmony_ci * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start 101bf215546Sopenharmony_ci * of every tile. 102bf215546Sopenharmony_ci */ 103bf215546Sopenharmony_ci cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) { 104bf215546Sopenharmony_ci list.data_type = _16_BIT_INDEX; 105bf215546Sopenharmony_ci list.primitive_type = TRIANGLES_LIST; 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci job->needs_flush = true; 109bf215546Sopenharmony_ci job->draw_width = vc4->framebuffer.width; 110bf215546Sopenharmony_ci job->draw_height = vc4->framebuffer.height; 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_cistatic void 114bf215546Sopenharmony_civc4_predraw_check_textures(struct pipe_context *pctx, 115bf215546Sopenharmony_ci struct vc4_texture_stateobj *stage_tex) 116bf215546Sopenharmony_ci{ 117bf215546Sopenharmony_ci struct vc4_context *vc4 = vc4_context(pctx); 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci for (int i = 0; i < stage_tex->num_textures; i++) { 120bf215546Sopenharmony_ci struct vc4_sampler_view *view = 121bf215546Sopenharmony_ci vc4_sampler_view(stage_tex->textures[i]); 122bf215546Sopenharmony_ci if (!view) 123bf215546Sopenharmony_ci continue; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci if (view->texture != view->base.texture) 126bf215546Sopenharmony_ci vc4_update_shadow_baselevel_texture(pctx, &view->base); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci vc4_flush_jobs_writing_resource(vc4, view->texture); 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_cistatic void 133bf215546Sopenharmony_civc4_emit_gl_shader_state(struct vc4_context *vc4, 134bf215546Sopenharmony_ci const struct pipe_draw_info *info, 135bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 136bf215546Sopenharmony_ci uint32_t extra_index_bias) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci struct vc4_job *job = vc4->job; 139bf215546Sopenharmony_ci /* VC4_DIRTY_VTXSTATE */ 140bf215546Sopenharmony_ci struct vc4_vertex_stateobj *vtx = vc4->vtx; 141bf215546Sopenharmony_ci /* VC4_DIRTY_VTXBUF */ 142bf215546Sopenharmony_ci struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* The simulator throws a fit if VS or CS don't read an attribute, so 145bf215546Sopenharmony_ci * we emit a dummy read. 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_ci uint32_t num_elements_emit = MAX2(vtx->num_elements, 1); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci /* Emit the shader record. */ 150bf215546Sopenharmony_ci cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci cl_emit(&job->shader_rec, SHADER_RECORD, rec) { 153bf215546Sopenharmony_ci rec.enable_clipping = true; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci /* VC4_DIRTY_COMPILED_FS */ 156bf215546Sopenharmony_ci rec.fragment_shader_is_single_threaded = 157bf215546Sopenharmony_ci !vc4->prog.fs->fs_threaded; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */ 160bf215546Sopenharmony_ci rec.point_size_included_in_shaded_vertex_data = 161bf215546Sopenharmony_ci (info->mode == PIPE_PRIM_POINTS && 162bf215546Sopenharmony_ci vc4->rasterizer->base.point_size_per_vertex); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci /* VC4_DIRTY_COMPILED_FS */ 165bf215546Sopenharmony_ci rec.fragment_shader_number_of_varyings = 166bf215546Sopenharmony_ci vc4->prog.fs->num_inputs; 167bf215546Sopenharmony_ci rec.fragment_shader_code_address = 168bf215546Sopenharmony_ci cl_address(vc4->prog.fs->bo, 0); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci rec.coordinate_shader_attribute_array_select_bits = 171bf215546Sopenharmony_ci vc4->prog.cs->vattrs_live; 172bf215546Sopenharmony_ci rec.coordinate_shader_total_attributes_size = 173bf215546Sopenharmony_ci vc4->prog.cs->vattr_offsets[8]; 174bf215546Sopenharmony_ci rec.coordinate_shader_code_address = 175bf215546Sopenharmony_ci cl_address(vc4->prog.cs->bo, 0); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci rec.vertex_shader_attribute_array_select_bits = 178bf215546Sopenharmony_ci vc4->prog.vs->vattrs_live; 179bf215546Sopenharmony_ci rec.vertex_shader_total_attributes_size = 180bf215546Sopenharmony_ci vc4->prog.vs->vattr_offsets[8]; 181bf215546Sopenharmony_ci rec.vertex_shader_code_address = 182bf215546Sopenharmony_ci cl_address(vc4->prog.vs->bo, 0); 183bf215546Sopenharmony_ci }; 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci uint32_t max_index = 0xffff; 186bf215546Sopenharmony_ci unsigned index_bias = info->index_size ? draws->index_bias : 0; 187bf215546Sopenharmony_ci for (int i = 0; i < vtx->num_elements; i++) { 188bf215546Sopenharmony_ci struct pipe_vertex_element *elem = &vtx->pipe[i]; 189bf215546Sopenharmony_ci struct pipe_vertex_buffer *vb = 190bf215546Sopenharmony_ci &vertexbuf->vb[elem->vertex_buffer_index]; 191bf215546Sopenharmony_ci struct vc4_resource *rsc = vc4_resource(vb->buffer.resource); 192bf215546Sopenharmony_ci /* not vc4->dirty tracked: vc4->last_index_bias */ 193bf215546Sopenharmony_ci uint32_t offset = (vb->buffer_offset + 194bf215546Sopenharmony_ci elem->src_offset + 195bf215546Sopenharmony_ci vb->stride * (index_bias + 196bf215546Sopenharmony_ci extra_index_bias)); 197bf215546Sopenharmony_ci uint32_t vb_size = rsc->bo->size - offset; 198bf215546Sopenharmony_ci uint32_t elem_size = 199bf215546Sopenharmony_ci util_format_get_blocksize(elem->src_format); 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) { 202bf215546Sopenharmony_ci attr.address = cl_address(rsc->bo, offset); 203bf215546Sopenharmony_ci attr.number_of_bytes_minus_1 = elem_size - 1; 204bf215546Sopenharmony_ci attr.stride = vb->stride; 205bf215546Sopenharmony_ci attr.coordinate_shader_vpm_offset = 206bf215546Sopenharmony_ci vc4->prog.cs->vattr_offsets[i]; 207bf215546Sopenharmony_ci attr.vertex_shader_vpm_offset = 208bf215546Sopenharmony_ci vc4->prog.vs->vattr_offsets[i]; 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci if (vb->stride > 0) { 212bf215546Sopenharmony_ci max_index = MIN2(max_index, 213bf215546Sopenharmony_ci (vb_size - elem_size) / vb->stride); 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci if (vtx->num_elements == 0) { 218bf215546Sopenharmony_ci assert(num_elements_emit == 1); 219bf215546Sopenharmony_ci struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO"); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) { 222bf215546Sopenharmony_ci attr.address = cl_address(bo, 0); 223bf215546Sopenharmony_ci attr.number_of_bytes_minus_1 = 16 - 1; 224bf215546Sopenharmony_ci attr.stride = 0; 225bf215546Sopenharmony_ci attr.coordinate_shader_vpm_offset = 0; 226bf215546Sopenharmony_ci attr.vertex_shader_vpm_offset = 0; 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci vc4_bo_unreference(&bo); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) { 233bf215546Sopenharmony_ci /* Note that number of attributes == 0 in the packet means 8 234bf215546Sopenharmony_ci * attributes. This field also contains the offset into 235bf215546Sopenharmony_ci * shader_rec. 236bf215546Sopenharmony_ci */ 237bf215546Sopenharmony_ci assert(vtx->num_elements <= 8); 238bf215546Sopenharmony_ci shader_state.number_of_attribute_arrays = 239bf215546Sopenharmony_ci num_elements_emit & 0x7; 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci vc4_write_uniforms(vc4, vc4->prog.fs, 243bf215546Sopenharmony_ci &vc4->constbuf[PIPE_SHADER_FRAGMENT], 244bf215546Sopenharmony_ci &vc4->fragtex); 245bf215546Sopenharmony_ci vc4_write_uniforms(vc4, vc4->prog.vs, 246bf215546Sopenharmony_ci &vc4->constbuf[PIPE_SHADER_VERTEX], 247bf215546Sopenharmony_ci &vc4->verttex); 248bf215546Sopenharmony_ci vc4_write_uniforms(vc4, vc4->prog.cs, 249bf215546Sopenharmony_ci &vc4->constbuf[PIPE_SHADER_VERTEX], 250bf215546Sopenharmony_ci &vc4->verttex); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci vc4->last_index_bias = index_bias + extra_index_bias; 253bf215546Sopenharmony_ci vc4->max_index = max_index; 254bf215546Sopenharmony_ci job->shader_rec_count++; 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci/** 258bf215546Sopenharmony_ci * HW-2116 workaround: Flush the batch before triggering the hardware state 259bf215546Sopenharmony_ci * counter wraparound behavior. 260bf215546Sopenharmony_ci * 261bf215546Sopenharmony_ci * State updates are tracked by a global counter which increments at the first 262bf215546Sopenharmony_ci * state update after a draw or a START_BINNING. Tiles can then have their 263bf215546Sopenharmony_ci * state updated at draw time with a set of cheap checks for whether the 264bf215546Sopenharmony_ci * state's copy of the global counter matches the global counter the last time 265bf215546Sopenharmony_ci * that state was written to the tile. 266bf215546Sopenharmony_ci * 267bf215546Sopenharmony_ci * The state counters are relatively small and wrap around quickly, so you 268bf215546Sopenharmony_ci * could get false negatives for needing to update a particular state in the 269bf215546Sopenharmony_ci * tile. To avoid this, the hardware attempts to write all of the state in 270bf215546Sopenharmony_ci * the tile at wraparound time. This apparently is broken, so we just flush 271bf215546Sopenharmony_ci * everything before that behavior is triggered. A batch flush is sufficient 272bf215546Sopenharmony_ci * to get our current contents drawn and reset the counters to 0. 273bf215546Sopenharmony_ci * 274bf215546Sopenharmony_ci * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the 275bf215546Sopenharmony_ci * tiles with VC4_PACKET_RETURN_FROM_LIST. 276bf215546Sopenharmony_ci */ 277bf215546Sopenharmony_cistatic void 278bf215546Sopenharmony_civc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count) 279bf215546Sopenharmony_ci{ 280bf215546Sopenharmony_ci struct vc4_context *vc4 = vc4_context(pctx); 281bf215546Sopenharmony_ci struct vc4_job *job = vc4_get_job_for_fbo(vc4); 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) { 284bf215546Sopenharmony_ci perf_debug("Flushing batch due to HW-2116 workaround " 285bf215546Sopenharmony_ci "(too many draw calls per scene\n"); 286bf215546Sopenharmony_ci vc4_job_submit(vc4, job); 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci} 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci/* A HW bug fails to draw 2-vert line loops. Just draw it as two GL_LINES. */ 291bf215546Sopenharmony_cistatic bool 292bf215546Sopenharmony_civc4_draw_workaround_line_loop_2(struct pipe_context *pctx, const struct pipe_draw_info *info, 293bf215546Sopenharmony_ci unsigned drawid_offset, 294bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 295bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draw) 296bf215546Sopenharmony_ci{ 297bf215546Sopenharmony_ci if (draw->count != 2 || info->mode != PIPE_PRIM_LINE_LOOP) 298bf215546Sopenharmony_ci return false; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci struct pipe_draw_info local_info = *info; 301bf215546Sopenharmony_ci local_info.mode = PIPE_PRIM_LINES; 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci /* Draw twice. The vertex order will be wrong on the second prim, but 304bf215546Sopenharmony_ci * that's probably not worth rewriting an index buffer over. 305bf215546Sopenharmony_ci */ 306bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) 307bf215546Sopenharmony_ci pctx->draw_vbo(pctx, &local_info, drawid_offset, indirect, draw, 1); 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci return true; 310bf215546Sopenharmony_ci} 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_cistatic void 313bf215546Sopenharmony_civc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, 314bf215546Sopenharmony_ci unsigned drawid_offset, 315bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 316bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 317bf215546Sopenharmony_ci unsigned num_draws) 318bf215546Sopenharmony_ci{ 319bf215546Sopenharmony_ci if (num_draws > 1) { 320bf215546Sopenharmony_ci util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws); 321bf215546Sopenharmony_ci return; 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci if (!indirect && (!draws[0].count || !info->instance_count)) 325bf215546Sopenharmony_ci return; 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci struct vc4_context *vc4 = vc4_context(pctx); 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci if (!indirect && 330bf215546Sopenharmony_ci !info->primitive_restart && 331bf215546Sopenharmony_ci !u_trim_pipe_prim(info->mode, (unsigned*)&draws[0].count)) 332bf215546Sopenharmony_ci return; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci if (vc4_draw_workaround_line_loop_2(pctx, info, drawid_offset, indirect, draws)) 335bf215546Sopenharmony_ci return; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci /* Before setting up the draw, do any fixup blits necessary. */ 338bf215546Sopenharmony_ci vc4_predraw_check_textures(pctx, &vc4->verttex); 339bf215546Sopenharmony_ci vc4_predraw_check_textures(pctx, &vc4->fragtex); 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci vc4_hw_2116_workaround(pctx, draws[0].count); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci struct vc4_job *job = vc4_get_job_for_fbo(vc4); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci /* Make sure that the raster order flags haven't changed, which can 346bf215546Sopenharmony_ci * only be set at job granularity. 347bf215546Sopenharmony_ci */ 348bf215546Sopenharmony_ci if (job->flags != vc4->rasterizer->tile_raster_order_flags) { 349bf215546Sopenharmony_ci vc4_job_submit(vc4, job); 350bf215546Sopenharmony_ci job = vc4_get_job_for_fbo(vc4); 351bf215546Sopenharmony_ci } 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci vc4_get_draw_cl_space(job, draws[0].count); 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci if (vc4->prim_mode != info->mode) { 356bf215546Sopenharmony_ci vc4->prim_mode = info->mode; 357bf215546Sopenharmony_ci vc4->dirty |= VC4_DIRTY_PRIM_MODE; 358bf215546Sopenharmony_ci } 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci vc4_start_draw(vc4); 361bf215546Sopenharmony_ci if (!vc4_update_compiled_shaders(vc4, info->mode)) { 362bf215546Sopenharmony_ci debug_warn_once("shader compile failed, skipping draw call.\n"); 363bf215546Sopenharmony_ci return; 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci vc4_emit_state(pctx); 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci bool needs_drawarrays_shader_state = false; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci unsigned index_bias = info->index_size ? draws->index_bias : 0; 371bf215546Sopenharmony_ci if ((vc4->dirty & (VC4_DIRTY_VTXBUF | 372bf215546Sopenharmony_ci VC4_DIRTY_VTXSTATE | 373bf215546Sopenharmony_ci VC4_DIRTY_PRIM_MODE | 374bf215546Sopenharmony_ci VC4_DIRTY_RASTERIZER | 375bf215546Sopenharmony_ci VC4_DIRTY_COMPILED_CS | 376bf215546Sopenharmony_ci VC4_DIRTY_COMPILED_VS | 377bf215546Sopenharmony_ci VC4_DIRTY_COMPILED_FS | 378bf215546Sopenharmony_ci vc4->prog.cs->uniform_dirty_bits | 379bf215546Sopenharmony_ci vc4->prog.vs->uniform_dirty_bits | 380bf215546Sopenharmony_ci vc4->prog.fs->uniform_dirty_bits)) || 381bf215546Sopenharmony_ci vc4->last_index_bias != index_bias) { 382bf215546Sopenharmony_ci if (info->index_size) 383bf215546Sopenharmony_ci vc4_emit_gl_shader_state(vc4, info, draws, 0); 384bf215546Sopenharmony_ci else 385bf215546Sopenharmony_ci needs_drawarrays_shader_state = true; 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci vc4->dirty = 0; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci /* Note that the primitive type fields match with OpenGL/gallium 391bf215546Sopenharmony_ci * definitions, up to but not including QUADS. 392bf215546Sopenharmony_ci */ 393bf215546Sopenharmony_ci if (info->index_size) { 394bf215546Sopenharmony_ci uint32_t index_size = info->index_size; 395bf215546Sopenharmony_ci uint32_t offset = draws[0].start * index_size; 396bf215546Sopenharmony_ci struct pipe_resource *prsc; 397bf215546Sopenharmony_ci if (info->index_size == 4) { 398bf215546Sopenharmony_ci prsc = vc4_get_shadow_index_buffer(pctx, info, 399bf215546Sopenharmony_ci offset, 400bf215546Sopenharmony_ci draws[0].count, &offset); 401bf215546Sopenharmony_ci index_size = 2; 402bf215546Sopenharmony_ci } else { 403bf215546Sopenharmony_ci if (info->has_user_indices) { 404bf215546Sopenharmony_ci unsigned start_offset = draws[0].start * info->index_size; 405bf215546Sopenharmony_ci prsc = NULL; 406bf215546Sopenharmony_ci u_upload_data(vc4->uploader, start_offset, 407bf215546Sopenharmony_ci draws[0].count * index_size, 4, 408bf215546Sopenharmony_ci (char*)info->index.user + start_offset, 409bf215546Sopenharmony_ci &offset, &prsc); 410bf215546Sopenharmony_ci } else { 411bf215546Sopenharmony_ci prsc = info->index.resource; 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci struct vc4_resource *rsc = vc4_resource(prsc); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci struct vc4_cl_out *bcl = cl_start(&job->bcl); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci /* The original design for the VC4 kernel UABI had multiple 419bf215546Sopenharmony_ci * packets that used relocations in the BCL (some of which 420bf215546Sopenharmony_ci * needed two BOs), but later modifications eliminated all but 421bf215546Sopenharmony_ci * this one usage. We have an arbitrary 32-bit offset value, 422bf215546Sopenharmony_ci * and need to also supply an arbitrary 32-bit index buffer 423bf215546Sopenharmony_ci * GEM handle, so we have this fake packet we emit in our BCL 424bf215546Sopenharmony_ci * to be validated, which the kernel uses at validation time 425bf215546Sopenharmony_ci * to perform the relocation in the IB packet (without 426bf215546Sopenharmony_ci * emitting to the actual HW). 427bf215546Sopenharmony_ci */ 428bf215546Sopenharmony_ci uint32_t hindex = vc4_gem_hindex(job, rsc->bo); 429bf215546Sopenharmony_ci if (job->last_gem_handle_hindex != hindex) { 430bf215546Sopenharmony_ci cl_u8(&bcl, VC4_PACKET_GEM_HANDLES); 431bf215546Sopenharmony_ci cl_u32(&bcl, hindex); 432bf215546Sopenharmony_ci cl_u32(&bcl, 0); 433bf215546Sopenharmony_ci job->last_gem_handle_hindex = hindex; 434bf215546Sopenharmony_ci } 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); 437bf215546Sopenharmony_ci cl_u8(&bcl, 438bf215546Sopenharmony_ci info->mode | 439bf215546Sopenharmony_ci (index_size == 2 ? 440bf215546Sopenharmony_ci VC4_INDEX_BUFFER_U16: 441bf215546Sopenharmony_ci VC4_INDEX_BUFFER_U8)); 442bf215546Sopenharmony_ci cl_u32(&bcl, draws[0].count); 443bf215546Sopenharmony_ci cl_u32(&bcl, offset); 444bf215546Sopenharmony_ci cl_u32(&bcl, vc4->max_index); 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci cl_end(&job->bcl, bcl); 447bf215546Sopenharmony_ci job->draw_calls_queued++; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci if (info->index_size == 4 || info->has_user_indices) 450bf215546Sopenharmony_ci pipe_resource_reference(&prsc, NULL); 451bf215546Sopenharmony_ci } else { 452bf215546Sopenharmony_ci uint32_t count = draws[0].count; 453bf215546Sopenharmony_ci uint32_t start = draws[0].start; 454bf215546Sopenharmony_ci uint32_t extra_index_bias = 0; 455bf215546Sopenharmony_ci static const uint32_t max_verts = 65535; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci /* GFXH-515 / SW-5891: The binner emits 16 bit indices for 458bf215546Sopenharmony_ci * drawarrays, which means that if start + count > 64k it 459bf215546Sopenharmony_ci * would truncate the top bits. Work around this by emitting 460bf215546Sopenharmony_ci * a limited number of primitives at a time and reemitting the 461bf215546Sopenharmony_ci * shader state pointing farther down the vertex attribute 462bf215546Sopenharmony_ci * arrays. 463bf215546Sopenharmony_ci * 464bf215546Sopenharmony_ci * To do this properly for line loops or trifans, we'd need to 465bf215546Sopenharmony_ci * make a new VB containing the first vertex plus whatever 466bf215546Sopenharmony_ci * remainder. 467bf215546Sopenharmony_ci */ 468bf215546Sopenharmony_ci if (start + count > max_verts) { 469bf215546Sopenharmony_ci extra_index_bias = start; 470bf215546Sopenharmony_ci start = 0; 471bf215546Sopenharmony_ci needs_drawarrays_shader_state = true; 472bf215546Sopenharmony_ci } 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci while (count) { 475bf215546Sopenharmony_ci uint32_t this_count = count; 476bf215546Sopenharmony_ci uint32_t step; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci if (needs_drawarrays_shader_state) { 479bf215546Sopenharmony_ci vc4_emit_gl_shader_state(vc4, info, draws, 480bf215546Sopenharmony_ci extra_index_bias); 481bf215546Sopenharmony_ci } 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci u_split_draw(info, max_verts, &this_count, &step); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) { 486bf215546Sopenharmony_ci array.primitive_mode = info->mode; 487bf215546Sopenharmony_ci array.length = this_count; 488bf215546Sopenharmony_ci array.index_of_first_vertex = start; 489bf215546Sopenharmony_ci } 490bf215546Sopenharmony_ci job->draw_calls_queued++; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci count -= step; 493bf215546Sopenharmony_ci extra_index_bias += start + step; 494bf215546Sopenharmony_ci start = 0; 495bf215546Sopenharmony_ci needs_drawarrays_shader_state = true; 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci /* We shouldn't have tripped the HW_2116 bug with the GFXH-515 500bf215546Sopenharmony_ci * workaround. 501bf215546Sopenharmony_ci */ 502bf215546Sopenharmony_ci assert(job->draw_calls_queued <= VC4_HW_2116_COUNT); 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci if (vc4->zsa && vc4->framebuffer.zsbuf) { 505bf215546Sopenharmony_ci struct vc4_resource *rsc = 506bf215546Sopenharmony_ci vc4_resource(vc4->framebuffer.zsbuf->texture); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci if (vc4->zsa->base.depth_enabled) { 509bf215546Sopenharmony_ci job->resolve |= PIPE_CLEAR_DEPTH; 510bf215546Sopenharmony_ci rsc->initialized_buffers = PIPE_CLEAR_DEPTH; 511bf215546Sopenharmony_ci } 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci if (vc4->zsa->base.stencil[0].enabled) { 514bf215546Sopenharmony_ci job->resolve |= PIPE_CLEAR_STENCIL; 515bf215546Sopenharmony_ci rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci job->resolve |= PIPE_CLEAR_COLOR0; 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci /* If we've used half of the presumably 256MB CMA area, flush the job 522bf215546Sopenharmony_ci * so that we don't accumulate a job that will end up not being 523bf215546Sopenharmony_ci * executable. 524bf215546Sopenharmony_ci */ 525bf215546Sopenharmony_ci if (job->bo_space > 128 * 1024 * 1024) 526bf215546Sopenharmony_ci vc4_flush(pctx); 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) 529bf215546Sopenharmony_ci vc4_flush(pctx); 530bf215546Sopenharmony_ci} 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_cistatic uint32_t 533bf215546Sopenharmony_cipack_rgba(enum pipe_format format, const float *rgba) 534bf215546Sopenharmony_ci{ 535bf215546Sopenharmony_ci union util_color uc; 536bf215546Sopenharmony_ci util_pack_color(rgba, format, &uc); 537bf215546Sopenharmony_ci if (util_format_get_blocksize(format) == 2) 538bf215546Sopenharmony_ci return uc.us; 539bf215546Sopenharmony_ci else 540bf215546Sopenharmony_ci return uc.ui[0]; 541bf215546Sopenharmony_ci} 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_cistatic void 544bf215546Sopenharmony_civc4_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state, 545bf215546Sopenharmony_ci const union pipe_color_union *color, double depth, unsigned stencil) 546bf215546Sopenharmony_ci{ 547bf215546Sopenharmony_ci struct vc4_context *vc4 = vc4_context(pctx); 548bf215546Sopenharmony_ci struct vc4_job *job = vc4_get_job_for_fbo(vc4); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { 551bf215546Sopenharmony_ci struct vc4_resource *rsc = 552bf215546Sopenharmony_ci vc4_resource(vc4->framebuffer.zsbuf->texture); 553bf215546Sopenharmony_ci unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci /* Clearing ZS will clear both Z and stencil, so if we're 556bf215546Sopenharmony_ci * trying to clear just one then we need to draw a quad to do 557bf215546Sopenharmony_ci * it instead. We need to do this before setting up 558bf215546Sopenharmony_ci * tile-based clears in vc4->job, because the blitter may 559bf215546Sopenharmony_ci * submit the current job. 560bf215546Sopenharmony_ci */ 561bf215546Sopenharmony_ci if ((zsclear == PIPE_CLEAR_DEPTH || 562bf215546Sopenharmony_ci zsclear == PIPE_CLEAR_STENCIL) && 563bf215546Sopenharmony_ci (rsc->initialized_buffers & ~(zsclear | job->cleared)) && 564bf215546Sopenharmony_ci util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) { 565bf215546Sopenharmony_ci static const union pipe_color_union dummy_color = {}; 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci perf_debug("Partial clear of Z+stencil buffer, " 568bf215546Sopenharmony_ci "drawing a quad instead of fast clearing\n"); 569bf215546Sopenharmony_ci vc4_blitter_save(vc4); 570bf215546Sopenharmony_ci util_blitter_clear(vc4->blitter, 571bf215546Sopenharmony_ci vc4->framebuffer.width, 572bf215546Sopenharmony_ci vc4->framebuffer.height, 573bf215546Sopenharmony_ci 1, 574bf215546Sopenharmony_ci zsclear, 575bf215546Sopenharmony_ci &dummy_color, depth, stencil, 576bf215546Sopenharmony_ci false); 577bf215546Sopenharmony_ci buffers &= ~zsclear; 578bf215546Sopenharmony_ci if (!buffers) 579bf215546Sopenharmony_ci return; 580bf215546Sopenharmony_ci job = vc4_get_job_for_fbo(vc4); 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci } 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci /* We can't flag new buffers for clearing once we've queued draws. We 585bf215546Sopenharmony_ci * could avoid this by using the 3d engine to clear. 586bf215546Sopenharmony_ci */ 587bf215546Sopenharmony_ci if (job->draw_calls_queued) { 588bf215546Sopenharmony_ci perf_debug("Flushing rendering to process new clear.\n"); 589bf215546Sopenharmony_ci vc4_job_submit(vc4, job); 590bf215546Sopenharmony_ci job = vc4_get_job_for_fbo(vc4); 591bf215546Sopenharmony_ci } 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci if (buffers & PIPE_CLEAR_COLOR0) { 594bf215546Sopenharmony_ci struct vc4_resource *rsc = 595bf215546Sopenharmony_ci vc4_resource(vc4->framebuffer.cbufs[0]->texture); 596bf215546Sopenharmony_ci uint32_t clear_color; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) { 599bf215546Sopenharmony_ci /* In 565 mode, the hardware will be packing our color 600bf215546Sopenharmony_ci * for us. 601bf215546Sopenharmony_ci */ 602bf215546Sopenharmony_ci clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, 603bf215546Sopenharmony_ci color->f); 604bf215546Sopenharmony_ci } else { 605bf215546Sopenharmony_ci /* Otherwise, we need to do this packing because we 606bf215546Sopenharmony_ci * support multiple swizzlings of RGBA8888. 607bf215546Sopenharmony_ci */ 608bf215546Sopenharmony_ci clear_color = 609bf215546Sopenharmony_ci pack_rgba(vc4->framebuffer.cbufs[0]->format, 610bf215546Sopenharmony_ci color->f); 611bf215546Sopenharmony_ci } 612bf215546Sopenharmony_ci job->clear_color[0] = job->clear_color[1] = clear_color; 613bf215546Sopenharmony_ci rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0); 614bf215546Sopenharmony_ci } 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { 617bf215546Sopenharmony_ci struct vc4_resource *rsc = 618bf215546Sopenharmony_ci vc4_resource(vc4->framebuffer.zsbuf->texture); 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci /* Though the depth buffer is stored with Z in the high 24, 621bf215546Sopenharmony_ci * for this field we just need to store it in the low 24. 622bf215546Sopenharmony_ci */ 623bf215546Sopenharmony_ci if (buffers & PIPE_CLEAR_DEPTH) { 624bf215546Sopenharmony_ci job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, 625bf215546Sopenharmony_ci depth); 626bf215546Sopenharmony_ci } 627bf215546Sopenharmony_ci if (buffers & PIPE_CLEAR_STENCIL) 628bf215546Sopenharmony_ci job->clear_stencil = stencil; 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci rsc->initialized_buffers |= (buffers & PIPE_CLEAR_DEPTHSTENCIL); 631bf215546Sopenharmony_ci } 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci job->draw_min_x = 0; 634bf215546Sopenharmony_ci job->draw_min_y = 0; 635bf215546Sopenharmony_ci job->draw_max_x = vc4->framebuffer.width; 636bf215546Sopenharmony_ci job->draw_max_y = vc4->framebuffer.height; 637bf215546Sopenharmony_ci job->cleared |= buffers; 638bf215546Sopenharmony_ci job->resolve |= buffers; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci vc4_start_draw(vc4); 641bf215546Sopenharmony_ci} 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_cistatic void 644bf215546Sopenharmony_civc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, 645bf215546Sopenharmony_ci const union pipe_color_union *color, 646bf215546Sopenharmony_ci unsigned x, unsigned y, unsigned w, unsigned h, 647bf215546Sopenharmony_ci bool render_condition_enabled) 648bf215546Sopenharmony_ci{ 649bf215546Sopenharmony_ci fprintf(stderr, "unimpl: clear RT\n"); 650bf215546Sopenharmony_ci} 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_cistatic void 653bf215546Sopenharmony_civc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, 654bf215546Sopenharmony_ci unsigned buffers, double depth, unsigned stencil, 655bf215546Sopenharmony_ci unsigned x, unsigned y, unsigned w, unsigned h, 656bf215546Sopenharmony_ci bool render_condition_enabled) 657bf215546Sopenharmony_ci{ 658bf215546Sopenharmony_ci fprintf(stderr, "unimpl: clear DS\n"); 659bf215546Sopenharmony_ci} 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_civoid 662bf215546Sopenharmony_civc4_draw_init(struct pipe_context *pctx) 663bf215546Sopenharmony_ci{ 664bf215546Sopenharmony_ci pctx->draw_vbo = vc4_draw_vbo; 665bf215546Sopenharmony_ci pctx->clear = vc4_clear; 666bf215546Sopenharmony_ci pctx->clear_render_target = vc4_clear_render_target; 667bf215546Sopenharmony_ci pctx->clear_depth_stencil = vc4_clear_depth_stencil; 668bf215546Sopenharmony_ci} 669