1/************************************************************************** 2 * 3 * Copyright 2003 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "i915_batch.h" 29#include "i915_context.h" 30#include "i915_debug.h" 31#include "i915_fpc.h" 32#include "i915_reg.h" 33#include "i915_resource.h" 34 35#include "pipe/p_context.h" 36#include "pipe/p_defines.h" 37#include "pipe/p_format.h" 38 39#include "util/format/u_format.h" 40#include "util/u_math.h" 41#include "util/u_memory.h" 42 43struct i915_tracked_hw_state { 44 const char *name; 45 void (*validate)(struct i915_context *, unsigned *batch_space); 46 void (*emit)(struct i915_context *); 47 unsigned dirty, batch_space; 48}; 49 50static void 51validate_flush(struct i915_context *i915, unsigned *batch_space) 52{ 53 *batch_space = i915->flush_dirty ? 1 : 0; 54} 55 56static void 57emit_flush(struct i915_context *i915) 58{ 59 /* Cache handling is very cheap atm. State handling can request to flushes: 60 * - I915_FLUSH_CACHE which is a flush everything request and 61 * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. 62 * Because the cache handling is so dumb, no explicit "invalidate map cache". 63 * Also, the first is a strict superset of the latter, so the following logic 64 * works. */ 65 if (i915->flush_dirty & I915_FLUSH_CACHE) 66 OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); 67 else if (i915->flush_dirty & I915_PIPELINE_FLUSH) 68 OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); 69} 70 71uint32_t invariant_state[] = { 72 _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | 73 AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, 74 75 _3DSTATE_DFLT_DIFFUSE_CMD, 0, 76 77 _3DSTATE_DFLT_SPEC_CMD, 0, 78 79 _3DSTATE_DFLT_Z_CMD, 0, 80 81 _3DSTATE_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) | CSB_TCB(2, 2) | 82 CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | 83 CSB_TCB(7, 7), 84 85 _3DSTATE_RASTER_RULES_CMD | ENABLE_POINT_RASTER_RULE | 86 OGL_POINT_RASTER_RULE | ENABLE_LINE_STRIP_PROVOKE_VRTX | 87 ENABLE_TRI_FAN_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) | 88 TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D, 89 90 _3DSTATE_DEPTH_SUBRECT_DISABLE, 91 92 /* disable indirect state for now 93 */ 94 _3DSTATE_LOAD_INDIRECT | 0, 0}; 95 96static void 97emit_invariant(struct i915_context *i915) 98{ 99 i915_winsys_batchbuffer_write( 100 i915->batch, invariant_state, 101 ARRAY_SIZE(invariant_state) * sizeof(uint32_t)); 102} 103 104static void 105validate_immediate(struct i915_context *i915, unsigned *batch_space) 106{ 107 unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | 108 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | 109 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | 110 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & 111 i915->immediate_dirty; 112 113 if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo) 114 i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; 115 116 *batch_space = 1 + util_bitcount(dirty); 117} 118 119static void 120emit_immediate_s5(struct i915_context *i915, uint32_t imm) 121{ 122 struct i915_surface *surf = i915_surface(i915->framebuffer.cbufs[0]); 123 124 if (surf) { 125 uint32_t writemask = imm & S5_WRITEDISABLE_MASK; 126 imm &= ~S5_WRITEDISABLE_MASK; 127 128 /* The register bits are not in order. */ 129 static const uint32_t writedisables[4] = { 130 S5_WRITEDISABLE_RED, 131 S5_WRITEDISABLE_GREEN, 132 S5_WRITEDISABLE_BLUE, 133 S5_WRITEDISABLE_ALPHA, 134 }; 135 136 for (int i = 0; i < 4; i++) { 137 if (writemask & writedisables[surf->color_swizzle[i]]) 138 imm |= writedisables[i]; 139 } 140 } 141 142 OUT_BATCH(imm); 143} 144 145static void 146emit_immediate(struct i915_context *i915) 147{ 148 /* remove unwanted bits and S7 */ 149 unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | 150 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | 151 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | 152 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & 153 i915->immediate_dirty; 154 int i, num = util_bitcount(dirty); 155 assert(num && num <= I915_MAX_IMMEDIATE); 156 157 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | dirty << 4 | (num - 1)); 158 159 if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { 160 if (i915->vbo) 161 OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, 162 i915->current.immediate[I915_IMMEDIATE_S0]); 163 else 164 OUT_BATCH(0); 165 } 166 167 for (i = 1; i < I915_MAX_IMMEDIATE; i++) { 168 if (dirty & (1 << i)) { 169 if (i == I915_IMMEDIATE_S5) 170 emit_immediate_s5(i915, i915->current.immediate[i]); 171 else 172 OUT_BATCH(i915->current.immediate[i]); 173 } 174 } 175} 176 177static void 178validate_dynamic(struct i915_context *i915, unsigned *batch_space) 179{ 180 *batch_space = 181 util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); 182} 183 184static void 185emit_dynamic(struct i915_context *i915) 186{ 187 int i; 188 for (i = 0; i < I915_MAX_DYNAMIC; i++) { 189 if (i915->dynamic_dirty & (1 << i)) 190 OUT_BATCH(i915->current.dynamic[i]); 191 } 192} 193 194static void 195validate_static(struct i915_context *i915, unsigned *batch_space) 196{ 197 *batch_space = 0; 198 199 if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { 200 i915->validation_buffers[i915->num_validation_buffers++] = 201 i915->current.cbuf_bo; 202 *batch_space += 3; 203 } 204 205 if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { 206 i915->validation_buffers[i915->num_validation_buffers++] = 207 i915->current.depth_bo; 208 *batch_space += 3; 209 } 210 211 if (i915->static_dirty & I915_DST_VARS) 212 *batch_space += 2; 213 214 if (i915->static_dirty & I915_DST_RECT) 215 *batch_space += 5; 216} 217 218static void 219emit_static(struct i915_context *i915) 220{ 221 if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { 222 OUT_BATCH(_3DSTATE_BUF_INFO_CMD); 223 OUT_BATCH(i915->current.cbuf_flags); 224 OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, 0); 225 } 226 227 /* What happens if no zbuf?? 228 */ 229 if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { 230 OUT_BATCH(_3DSTATE_BUF_INFO_CMD); 231 OUT_BATCH(i915->current.depth_flags); 232 OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0); 233 } 234 235 if (i915->static_dirty & I915_DST_VARS) { 236 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); 237 OUT_BATCH(i915->current.dst_buf_vars); 238 } 239} 240 241static void 242validate_map(struct i915_context *i915, unsigned *batch_space) 243{ 244 const uint32_t enabled = i915->current.sampler_enable_flags; 245 uint32_t unit; 246 struct i915_texture *tex; 247 248 *batch_space = i915->current.sampler_enable_nr 249 ? 2 + 3 * i915->current.sampler_enable_nr 250 : 0; 251 252 for (unit = 0; unit < I915_TEX_UNITS; unit++) { 253 if (enabled & (1 << unit)) { 254 tex = i915_texture(i915->fragment_sampler_views[unit]->texture); 255 i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; 256 } 257 } 258} 259 260static void 261emit_map(struct i915_context *i915) 262{ 263 const uint32_t nr = i915->current.sampler_enable_nr; 264 if (nr) { 265 const uint32_t enabled = i915->current.sampler_enable_flags; 266 uint32_t unit; 267 uint32_t count = 0; 268 OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); 269 OUT_BATCH(enabled); 270 for (unit = 0; unit < I915_TEX_UNITS; unit++) { 271 if (enabled & (1 << unit)) { 272 struct i915_texture *texture = 273 i915_texture(i915->fragment_sampler_views[unit]->texture); 274 struct i915_winsys_buffer *buf = texture->buffer; 275 unsigned offset = i915->current.texbuffer[unit][2]; 276 277 assert(buf); 278 279 count++; 280 281 OUT_RELOC(buf, I915_USAGE_SAMPLER, offset); 282 OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ 283 OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ 284 } 285 } 286 assert(count == nr); 287 } 288} 289 290static void 291validate_sampler(struct i915_context *i915, unsigned *batch_space) 292{ 293 *batch_space = i915->current.sampler_enable_nr 294 ? 2 + 3 * i915->current.sampler_enable_nr 295 : 0; 296} 297 298static void 299emit_sampler(struct i915_context *i915) 300{ 301 if (i915->current.sampler_enable_nr) { 302 int i; 303 304 OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr)); 305 306 OUT_BATCH(i915->current.sampler_enable_flags); 307 308 for (i = 0; i < I915_TEX_UNITS; i++) { 309 if (i915->current.sampler_enable_flags & (1 << i)) { 310 OUT_BATCH(i915->current.sampler[i][0]); 311 OUT_BATCH(i915->current.sampler[i][1]); 312 OUT_BATCH(i915->current.sampler[i][2]); 313 } 314 } 315 } 316} 317 318static void 319validate_constants(struct i915_context *i915, unsigned *batch_space) 320{ 321 int nr = i915->fs->num_constants ? 2 + 4 * i915->fs->num_constants : 0; 322 323 *batch_space = nr; 324} 325 326static void 327emit_constants(struct i915_context *i915) 328{ 329 /* Collate the user-defined constants with the fragment shader's 330 * immediates according to the constant_flags[] array. 331 */ 332 const uint32_t nr = i915->fs->num_constants; 333 334 assert(nr <= I915_MAX_CONSTANT); 335 if (nr) { 336 uint32_t i; 337 338 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4)); 339 OUT_BATCH((1 << nr) - 1); 340 341 for (i = 0; i < nr; i++) { 342 const uint32_t *c; 343 if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { 344 /* grab user-defined constant */ 345 c = (uint32_t *)i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT]) 346 ->data; 347 c += 4 * i; 348 } else { 349 /* emit program constant */ 350 c = (uint32_t *)i915->fs->constants[i]; 351 } 352#if 0 /* debug */ 353 { 354 float *f = (float *) c; 355 printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], 356 (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER 357 ? "user" : "immediate")); 358 } 359#endif 360 OUT_BATCH(*c++); 361 OUT_BATCH(*c++); 362 OUT_BATCH(*c++); 363 OUT_BATCH(*c++); 364 } 365 } 366} 367 368static void 369validate_program(struct i915_context *i915, unsigned *batch_space) 370{ 371 /* we need more batch space if we want to emulate rgba framebuffers */ 372 *batch_space = i915->fs->program_len + (i915->current.fixup_swizzle ? 3 : 0); 373} 374 375static void 376emit_program(struct i915_context *i915) 377{ 378 /* we should always have, at least, a pass-through program */ 379 assert(i915->fs->program_len > 0); 380 381 /* If we're doing a fixup swizzle, that's 3 more dwords to add. */ 382 uint32_t additional_size = 0; 383 if (i915->current.fixup_swizzle) 384 additional_size = 3; 385 386 /* output the program: 1 dword of header, then 3 dwords per decl/instruction */ 387 assert(i915->fs->program_len % 3 == 1); 388 389 /* first word has the size, adjust it for fixup swizzle */ 390 OUT_BATCH(i915->fs->program[0] + additional_size); 391 392 for (int i = 1; i < i915->fs->program_len; i++) 393 OUT_BATCH(i915->fs->program[i]); 394 395 /* we emit an additional mov with swizzle to fake RGBA framebuffers */ 396 if (i915->current.fixup_swizzle) { 397 /* mov out_color, out_color.zyxw */ 398 OUT_BATCH(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | 399 A0_DEST_CHANNEL_ALL | (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | 400 (T_DIFFUSE << A0_SRC0_NR_SHIFT)); 401 OUT_BATCH(i915->current.fixup_swizzle); 402 OUT_BATCH(0); 403 } 404} 405 406static void 407emit_draw_rect(struct i915_context *i915) 408{ 409 if (i915->static_dirty & I915_DST_RECT) { 410 OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); 411 OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); 412 OUT_BATCH(i915->current.draw_offset); 413 OUT_BATCH(i915->current.draw_size); 414 OUT_BATCH(i915->current.draw_offset); 415 } 416} 417 418static bool 419i915_validate_state(struct i915_context *i915, unsigned *batch_space) 420{ 421 unsigned tmp; 422 423 i915->num_validation_buffers = 0; 424 if (i915->hardware_dirty & I915_HW_INVARIANT) 425 *batch_space = ARRAY_SIZE(invariant_state); 426 else 427 *batch_space = 0; 428 429#if 0 430static int counter_total = 0; 431#define VALIDATE_ATOM(atom, hw_dirty) \ 432 if (i915->hardware_dirty & hw_dirty) { \ 433 static int counter_##atom = 0; \ 434 validate_##atom(i915, &tmp); \ 435 *batch_space += tmp; \ 436 counter_##atom += tmp; \ 437 counter_total += tmp; \ 438 printf("%s: \t%d/%d \t%2.2f\n", #atom, counter_##atom, counter_total, \ 439 counter_##atom * 100.f / counter_total); \ 440 } 441#else 442#define VALIDATE_ATOM(atom, hw_dirty) \ 443 if (i915->hardware_dirty & hw_dirty) { \ 444 validate_##atom(i915, &tmp); \ 445 *batch_space += tmp; \ 446 } 447#endif 448 VALIDATE_ATOM(flush, I915_HW_FLUSH); 449 VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); 450 VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); 451 VALIDATE_ATOM(static, I915_HW_STATIC); 452 VALIDATE_ATOM(map, I915_HW_MAP); 453 VALIDATE_ATOM(sampler, I915_HW_SAMPLER); 454 VALIDATE_ATOM(constants, I915_HW_CONSTANTS); 455 VALIDATE_ATOM(program, I915_HW_PROGRAM); 456#undef VALIDATE_ATOM 457 458 if (i915->num_validation_buffers == 0) 459 return true; 460 461 if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, 462 i915->num_validation_buffers)) 463 return false; 464 465 return true; 466} 467 468/* Push the state into the sarea and/or texture memory. 469 */ 470void 471i915_emit_hardware_state(struct i915_context *i915) 472{ 473 unsigned batch_space; 474 uintptr_t save_ptr; 475 476 assert(i915->dirty == 0); 477 478 if (I915_DBG_ON(DBG_ATOMS)) 479 i915_dump_hardware_dirty(i915, __FUNCTION__); 480 481 if (!i915_validate_state(i915, &batch_space)) { 482 FLUSH_BATCH(NULL, I915_FLUSH_ASYNC); 483 assert(i915_validate_state(i915, &batch_space)); 484 } 485 486 if (!BEGIN_BATCH(batch_space)) { 487 FLUSH_BATCH(NULL, I915_FLUSH_ASYNC); 488 assert(i915_validate_state(i915, &batch_space)); 489 assert(BEGIN_BATCH(batch_space)); 490 } 491 492 save_ptr = (uintptr_t)i915->batch->ptr; 493 494#define EMIT_ATOM(atom, hw_dirty) \ 495 if (i915->hardware_dirty & hw_dirty) \ 496 emit_##atom(i915); 497 EMIT_ATOM(flush, I915_HW_FLUSH); 498 EMIT_ATOM(invariant, I915_HW_INVARIANT); 499 EMIT_ATOM(immediate, I915_HW_IMMEDIATE); 500 EMIT_ATOM(dynamic, I915_HW_DYNAMIC); 501 EMIT_ATOM(static, I915_HW_STATIC); 502 EMIT_ATOM(map, I915_HW_MAP); 503 EMIT_ATOM(sampler, I915_HW_SAMPLER); 504 EMIT_ATOM(constants, I915_HW_CONSTANTS); 505 EMIT_ATOM(program, I915_HW_PROGRAM); 506 EMIT_ATOM(draw_rect, I915_HW_STATIC); 507#undef EMIT_ATOM 508 509 I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__, 510 ((uintptr_t)i915->batch->ptr - save_ptr) / 4, batch_space); 511 assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space); 512 513 i915->hardware_dirty = 0; 514 i915->immediate_dirty = 0; 515 i915->dynamic_dirty = 0; 516 i915->static_dirty = 0; 517 i915->flush_dirty = 0; 518} 519