1/* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2010 Tom Stellard <tstellar@gmail.com> 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29#include "util/u_math.h" 30 31#include "radeon_dataflow.h" 32 33#include "radeon_compiler.h" 34#include "radeon_compiler_util.h" 35#include "radeon_list.h" 36#include "radeon_swizzle.h" 37#include "radeon_variable.h" 38 39struct src_clobbered_reads_cb_data { 40 rc_register_file File; 41 unsigned int Index; 42 unsigned int Mask; 43 struct rc_reader_data * ReaderData; 44}; 45 46typedef void (*rc_presub_replace_fn)(struct rc_instruction *, 47 struct rc_instruction *, 48 unsigned int); 49 50static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 51{ 52 struct rc_src_register combine; 53 combine.File = inner.File; 54 combine.Index = inner.Index; 55 combine.RelAddr = inner.RelAddr; 56 if (outer.Abs) { 57 combine.Abs = 1; 58 combine.Negate = outer.Negate; 59 } else { 60 combine.Abs = inner.Abs; 61 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 62 combine.Negate ^= outer.Negate; 63 } 64 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 65 return combine; 66} 67 68static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 69 struct rc_src_register * src) 70{ 71 rc_register_file file = src->File; 72 struct rc_reader_data * reader_data = data; 73 74 if(!rc_inst_can_use_presub(inst, 75 reader_data->Writer->U.I.PreSub.Opcode, 76 rc_swizzle_to_writemask(src->Swizzle), 77 src, 78 &reader_data->Writer->U.I.PreSub.SrcReg[0], 79 &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 80 reader_data->Abort = 1; 81 return; 82 } 83 84 /* XXX This could probably be handled better. */ 85 if (file == RC_FILE_ADDRESS) { 86 reader_data->Abort = 1; 87 return; 88 } 89 90 /* These instructions cannot read from the constants file. 91 * see radeonTransformTEX() 92 */ 93 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 94 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 95 (inst->U.I.Opcode == RC_OPCODE_TEX || 96 inst->U.I.Opcode == RC_OPCODE_TXB || 97 inst->U.I.Opcode == RC_OPCODE_TXP || 98 inst->U.I.Opcode == RC_OPCODE_TXD || 99 inst->U.I.Opcode == RC_OPCODE_TXL || 100 inst->U.I.Opcode == RC_OPCODE_KIL)){ 101 reader_data->Abort = 1; 102 return; 103 } 104} 105 106static void src_clobbered_reads_cb( 107 void * data, 108 struct rc_instruction * inst, 109 struct rc_src_register * src) 110{ 111 struct src_clobbered_reads_cb_data * sc_data = data; 112 113 if (src->File == sc_data->File 114 && src->Index == sc_data->Index 115 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 116 117 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 118 } 119 120 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 121 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 122 } 123} 124 125static void is_src_clobbered_scan_write( 126 void * data, 127 struct rc_instruction * inst, 128 rc_register_file file, 129 unsigned int index, 130 unsigned int mask) 131{ 132 struct src_clobbered_reads_cb_data sc_data; 133 struct rc_reader_data * reader_data = data; 134 sc_data.File = file; 135 sc_data.Index = index; 136 sc_data.Mask = mask; 137 sc_data.ReaderData = reader_data; 138 rc_for_all_reads_src(reader_data->Writer, 139 src_clobbered_reads_cb, &sc_data); 140} 141 142static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 143{ 144 struct rc_reader_data reader_data; 145 unsigned int i; 146 147 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 148 inst_mov->U.I.WriteALUResult) 149 return; 150 151 /* Get a list of all the readers of this MOV instruction. */ 152 reader_data.ExitOnAbort = 1; 153 rc_get_readers(c, inst_mov, &reader_data, 154 copy_propagate_scan_read, NULL, 155 is_src_clobbered_scan_write); 156 157 if (reader_data.Abort || reader_data.ReaderCount == 0 || reader_data.ReadersAfterEndloop) 158 return; 159 160 /* We can propagate SaturateMode if all the readers are MOV instructions 161 * without a presubtract operation, source negation and absolute. 162 * In that case, we just move SaturateMode to all readers. */ 163 if (inst_mov->U.I.SaturateMode) { 164 for (i = 0; i < reader_data.ReaderCount; i++) { 165 struct rc_instruction * inst = reader_data.Readers[i].Inst; 166 167 if (inst->U.I.Opcode != RC_OPCODE_MOV || 168 inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || 169 inst->U.I.SrcReg[0].Abs || 170 inst->U.I.SrcReg[0].Negate) { 171 return; 172 } 173 } 174 } 175 176 /* Propagate the MOV instruction. */ 177 for (i = 0; i < reader_data.ReaderCount; i++) { 178 struct rc_instruction * inst = reader_data.Readers[i].Inst; 179 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 180 181 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 182 inst->U.I.PreSub = inst_mov->U.I.PreSub; 183 if (!inst->U.I.SaturateMode) 184 inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; 185 } 186 187 /* Finally, remove the original MOV instruction */ 188 rc_remove_instruction(inst_mov); 189} 190 191/** 192 * Check if a source register is actually always the same 193 * swizzle constant. 194 */ 195static int is_src_uniform_constant(struct rc_src_register src, 196 rc_swizzle * pswz, unsigned int * pnegate) 197{ 198 int have_used = 0; 199 200 if (src.File != RC_FILE_NONE) { 201 *pswz = 0; 202 return 0; 203 } 204 205 for(unsigned int chan = 0; chan < 4; ++chan) { 206 unsigned int swz = GET_SWZ(src.Swizzle, chan); 207 if (swz < 4) { 208 *pswz = 0; 209 return 0; 210 } 211 if (swz == RC_SWIZZLE_UNUSED) 212 continue; 213 214 if (!have_used) { 215 *pswz = swz; 216 *pnegate = GET_BIT(src.Negate, chan); 217 have_used = 1; 218 } else { 219 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 220 *pswz = 0; 221 return 0; 222 } 223 } 224 } 225 226 return 1; 227} 228 229static void constant_folding_mad(struct rc_instruction * inst) 230{ 231 rc_swizzle swz = 0; 232 unsigned int negate= 0; 233 234 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 235 if (swz == RC_SWIZZLE_ZERO) { 236 inst->U.I.Opcode = RC_OPCODE_MUL; 237 return; 238 } 239 } 240 241 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 242 if (swz == RC_SWIZZLE_ONE) { 243 inst->U.I.Opcode = RC_OPCODE_ADD; 244 if (negate) 245 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 246 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 247 return; 248 } else if (swz == RC_SWIZZLE_ZERO) { 249 inst->U.I.Opcode = RC_OPCODE_MOV; 250 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 251 return; 252 } 253 } 254 255 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 256 if (swz == RC_SWIZZLE_ONE) { 257 inst->U.I.Opcode = RC_OPCODE_ADD; 258 if (negate) 259 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 260 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 261 return; 262 } else if (swz == RC_SWIZZLE_ZERO) { 263 inst->U.I.Opcode = RC_OPCODE_MOV; 264 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 265 return; 266 } 267 } 268} 269 270static void constant_folding_mul(struct rc_instruction * inst) 271{ 272 rc_swizzle swz = 0; 273 unsigned int negate = 0; 274 275 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 276 if (swz == RC_SWIZZLE_ONE) { 277 inst->U.I.Opcode = RC_OPCODE_MOV; 278 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 279 if (negate) 280 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 281 return; 282 } else if (swz == RC_SWIZZLE_ZERO) { 283 inst->U.I.Opcode = RC_OPCODE_MOV; 284 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 285 return; 286 } 287 } 288 289 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 290 if (swz == RC_SWIZZLE_ONE) { 291 inst->U.I.Opcode = RC_OPCODE_MOV; 292 if (negate) 293 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 294 return; 295 } else if (swz == RC_SWIZZLE_ZERO) { 296 inst->U.I.Opcode = RC_OPCODE_MOV; 297 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 298 return; 299 } 300 } 301} 302 303static void constant_folding_add(struct rc_instruction * inst) 304{ 305 rc_swizzle swz = 0; 306 unsigned int negate = 0; 307 308 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 309 if (swz == RC_SWIZZLE_ZERO) { 310 inst->U.I.Opcode = RC_OPCODE_MOV; 311 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 312 return; 313 } 314 } 315 316 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 317 if (swz == RC_SWIZZLE_ZERO) { 318 inst->U.I.Opcode = RC_OPCODE_MOV; 319 return; 320 } 321 } 322} 323 324/** 325 * Replace 0.0, 1.0 and 0.5 immediate constants by their 326 * respective swizzles. Simplify instructions like ADD dst, src, 0; 327 */ 328static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 329{ 330 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 331 unsigned int i; 332 333 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 334 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 335 struct rc_constant * constant; 336 struct rc_src_register newsrc; 337 int have_real_reference; 338 unsigned int chan; 339 340 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 341 for (chan = 0; chan < 4; ++chan) 342 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 343 break; 344 if (chan == 4) { 345 inst->U.I.SrcReg[src].File = RC_FILE_NONE; 346 continue; 347 } 348 349 /* Convert immediates to swizzles. */ 350 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 351 inst->U.I.SrcReg[src].RelAddr || 352 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 353 continue; 354 355 constant = 356 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 357 358 if (constant->Type != RC_CONSTANT_IMMEDIATE) 359 continue; 360 361 newsrc = inst->U.I.SrcReg[src]; 362 have_real_reference = 0; 363 for (chan = 0; chan < 4; ++chan) { 364 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 365 unsigned int newswz; 366 float imm; 367 float baseimm; 368 369 if (swz >= 4) 370 continue; 371 372 imm = constant->u.Immediate[swz]; 373 baseimm = imm; 374 if (imm < 0.0) 375 baseimm = -baseimm; 376 377 if (baseimm == 0.0) { 378 newswz = RC_SWIZZLE_ZERO; 379 } else if (baseimm == 1.0) { 380 newswz = RC_SWIZZLE_ONE; 381 } else if (baseimm == 0.5 && c->has_half_swizzles) { 382 newswz = RC_SWIZZLE_HALF; 383 } else { 384 have_real_reference = 1; 385 continue; 386 } 387 388 SET_SWZ(newsrc.Swizzle, chan, newswz); 389 if (imm < 0.0 && !newsrc.Abs) 390 newsrc.Negate ^= 1 << chan; 391 } 392 393 if (!have_real_reference) { 394 newsrc.File = RC_FILE_NONE; 395 newsrc.Index = 0; 396 } 397 398 /* don't make the swizzle worse */ 399 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc)) 400 continue; 401 402 inst->U.I.SrcReg[src] = newsrc; 403 } 404 405 /* Simplify instructions based on constants */ 406 if (inst->U.I.Opcode == RC_OPCODE_MAD) 407 constant_folding_mad(inst); 408 409 /* note: MAD can simplify to MUL or ADD */ 410 if (inst->U.I.Opcode == RC_OPCODE_MUL) 411 constant_folding_mul(inst); 412 else if (inst->U.I.Opcode == RC_OPCODE_ADD) 413 constant_folding_add(inst); 414 415 /* In case this instruction has been converted, make sure all of the 416 * registers that are no longer used are empty. */ 417 opcode = rc_get_opcode_info(inst->U.I.Opcode); 418 for(i = opcode->NumSrcRegs; i < 3; i++) { 419 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 420 } 421} 422 423/** 424 * If src and dst use the same register, this function returns a writemask that 425 * indicates which components are read by src. Otherwise zero is returned. 426 */ 427static unsigned int src_reads_dst_mask(struct rc_src_register src, 428 struct rc_dst_register dst) 429{ 430 if (dst.File != src.File || dst.Index != src.Index) { 431 return 0; 432 } 433 return rc_swizzle_to_writemask(src.Swizzle); 434} 435 436/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 437 * in any of its channels. Return 0 otherwise. */ 438static int src_has_const_swz(struct rc_src_register src) { 439 int chan; 440 for(chan = 0; chan < 4; chan++) { 441 unsigned int swz = GET_SWZ(src.Swizzle, chan); 442 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 443 || swz == RC_SWIZZLE_ONE) { 444 return 1; 445 } 446 } 447 return 0; 448} 449 450static void presub_scan_read( 451 void * data, 452 struct rc_instruction * inst, 453 struct rc_src_register * src) 454{ 455 struct rc_reader_data * reader_data = data; 456 rc_presubtract_op * presub_opcode = reader_data->CbData; 457 458 if (!rc_inst_can_use_presub(inst, *presub_opcode, 459 reader_data->Writer->U.I.DstReg.WriteMask, 460 src, 461 &reader_data->Writer->U.I.SrcReg[0], 462 &reader_data->Writer->U.I.SrcReg[1])) { 463 reader_data->Abort = 1; 464 return; 465 } 466} 467 468static int presub_helper( 469 struct radeon_compiler * c, 470 struct rc_instruction * inst_add, 471 rc_presubtract_op presub_opcode, 472 rc_presub_replace_fn presub_replace) 473{ 474 struct rc_reader_data reader_data; 475 unsigned int i; 476 rc_presubtract_op cb_op = presub_opcode; 477 478 reader_data.CbData = &cb_op; 479 reader_data.ExitOnAbort = 1; 480 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 481 is_src_clobbered_scan_write); 482 483 if (reader_data.Abort || reader_data.ReaderCount == 0) 484 return 0; 485 486 for(i = 0; i < reader_data.ReaderCount; i++) { 487 unsigned int src_index; 488 struct rc_reader reader = reader_data.Readers[i]; 489 const struct rc_opcode_info * info = 490 rc_get_opcode_info(reader.Inst->U.I.Opcode); 491 492 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 493 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 494 presub_replace(inst_add, reader.Inst, src_index); 495 } 496 } 497 return 1; 498} 499 500static void presub_replace_add( 501 struct rc_instruction * inst_add, 502 struct rc_instruction * inst_reader, 503 unsigned int src_index) 504{ 505 rc_presubtract_op presub_opcode; 506 507 /* This function assumes that inst_add->U.I.SrcReg[0] and 508 * inst_add->U.I.SrcReg[1] aren't both negative. 509 */ 510 assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate)); 511 512 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 513 presub_opcode = RC_PRESUB_SUB; 514 else 515 presub_opcode = RC_PRESUB_ADD; 516 517 if (inst_add->U.I.SrcReg[1].Negate) { 518 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 519 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 520 } else { 521 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 522 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 523 } 524 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 525 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 526 inst_reader->U.I.PreSub.Opcode = presub_opcode; 527 inst_reader->U.I.SrcReg[src_index] = 528 chain_srcregs(inst_reader->U.I.SrcReg[src_index], 529 inst_reader->U.I.PreSub.SrcReg[0]); 530 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 531 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 532} 533 534static int is_presub_candidate( 535 struct radeon_compiler * c, 536 struct rc_instruction * inst) 537{ 538 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 539 unsigned int i; 540 unsigned int is_constant[2] = {0, 0}; 541 542 assert(inst->U.I.Opcode == RC_OPCODE_ADD); 543 544 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 545 || inst->U.I.SaturateMode 546 || inst->U.I.WriteALUResult 547 || inst->U.I.Omod) { 548 return 0; 549 } 550 551 /* If both sources use a constant swizzle, then we can't convert it to 552 * a presubtract operation. In fact for the ADD and SUB presubtract 553 * operations neither source can contain a constant swizzle. This 554 * specific case is checked in peephole_add_presub_add() when 555 * we make sure the swizzles for both sources are equal, so we 556 * don't need to worry about it here. */ 557 for (i = 0; i < 2; i++) { 558 int chan; 559 for (chan = 0; chan < 4; chan++) { 560 rc_swizzle swz = 561 get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 562 if (swz == RC_SWIZZLE_ONE 563 || swz == RC_SWIZZLE_ZERO 564 || swz == RC_SWIZZLE_HALF) { 565 is_constant[i] = 1; 566 } 567 } 568 } 569 if (is_constant[0] && is_constant[1]) 570 return 0; 571 572 for(i = 0; i < info->NumSrcRegs; i++) { 573 struct rc_src_register src = inst->U.I.SrcReg[i]; 574 if (src_reads_dst_mask(src, inst->U.I.DstReg)) 575 return 0; 576 577 src.File = RC_FILE_PRESUB; 578 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 579 return 0; 580 } 581 return 1; 582} 583 584static int peephole_add_presub_add( 585 struct radeon_compiler * c, 586 struct rc_instruction * inst_add) 587{ 588 unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 589 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 590 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 591 592 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 593 return 0; 594 595 /* src0 and src1 can't have absolute values */ 596 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 597 return 0; 598 599 /* presub_replace_add() assumes only one is negative */ 600 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 601 return 0; 602 603 /* if src0 is negative, at least all bits of dstmask have to be set */ 604 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 605 return 0; 606 607 /* if src1 is negative, at least all bits of dstmask have to be set */ 608 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 609 return 0; 610 611 if (!is_presub_candidate(c, inst_add)) 612 return 0; 613 614 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 615 rc_remove_instruction(inst_add); 616 return 1; 617 } 618 return 0; 619} 620 621static void presub_replace_inv( 622 struct rc_instruction * inst_add, 623 struct rc_instruction * inst_reader, 624 unsigned int src_index) 625{ 626 /* We must be careful not to modify inst_add, since it 627 * is possible it will remain part of the program.*/ 628 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 629 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 630 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 631 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 632 inst_reader->U.I.PreSub.SrcReg[0]); 633 634 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 635 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 636} 637 638/** 639 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 640 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 641 * of the add instruction must have the constatnt 1 swizzle. This function 642 * does not check const registers to see if their value is 1.0, so it should 643 * be called after the constant_folding optimization. 644 * @return 645 * 0 if the ADD instruction is still part of the program. 646 * 1 if the ADD instruction is no longer part of the program. 647 */ 648static int peephole_add_presub_inv( 649 struct radeon_compiler * c, 650 struct rc_instruction * inst_add) 651{ 652 unsigned int i, swz; 653 654 if (!is_presub_candidate(c, inst_add)) 655 return 0; 656 657 /* Check if src0 is 1. */ 658 /* XXX It would be nice to use is_src_uniform_constant here, but that 659 * function only works if the register's file is RC_FILE_NONE */ 660 for(i = 0; i < 4; i++ ) { 661 if (!(inst_add->U.I.DstReg.WriteMask & (1 << i))) 662 continue; 663 664 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 665 if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i)) 666 return 0; 667 } 668 669 /* Check src1. */ 670 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 671 inst_add->U.I.DstReg.WriteMask 672 || inst_add->U.I.SrcReg[1].Abs 673 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 674 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 675 || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 676 677 return 0; 678 } 679 680 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 681 rc_remove_instruction(inst_add); 682 return 1; 683 } 684 return 0; 685} 686 687struct peephole_mul_cb_data { 688 struct rc_dst_register * Writer; 689 unsigned int Clobbered; 690}; 691 692static void omod_filter_reader_cb( 693 void * userdata, 694 struct rc_instruction * inst, 695 rc_register_file file, 696 unsigned int index, 697 unsigned int mask) 698{ 699 struct peephole_mul_cb_data * d = userdata; 700 if (rc_src_reads_dst_mask(file, mask, index, 701 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 702 703 d->Clobbered = 1; 704 } 705} 706 707static void omod_filter_writer_cb( 708 void * userdata, 709 struct rc_instruction * inst, 710 rc_register_file file, 711 unsigned int index, 712 unsigned int mask) 713{ 714 struct peephole_mul_cb_data * d = userdata; 715 if (file == d->Writer->File && index == d->Writer->Index && 716 (mask & d->Writer->WriteMask)) { 717 d->Clobbered = 1; 718 } 719} 720 721static int peephole_mul_omod( 722 struct radeon_compiler * c, 723 struct rc_instruction * inst_mul, 724 struct rc_list * var_list) 725{ 726 unsigned int chan = 0, swz, i; 727 int const_index = -1; 728 int temp_index = -1; 729 float const_value; 730 rc_omod_op omod_op = RC_OMOD_DISABLE; 731 struct rc_list * writer_list; 732 struct rc_variable * var; 733 struct peephole_mul_cb_data cb_data; 734 unsigned writemask_sum; 735 736 for (i = 0; i < 2; i++) { 737 unsigned int j; 738 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 739 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 740 return 0; 741 } 742 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 743 if (temp_index != -1) { 744 /* The instruction has two temp sources */ 745 return 0; 746 } else { 747 temp_index = i; 748 continue; 749 } 750 } 751 /* If we get this far Src[i] must be a constant src */ 752 if (inst_mul->U.I.SrcReg[i].Negate) { 753 return 0; 754 } 755 /* The constant src needs to read from the same swizzle */ 756 swz = RC_SWIZZLE_UNUSED; 757 chan = 0; 758 for (j = 0; j < 4; j++) { 759 unsigned int j_swz = 760 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 761 if (j_swz == RC_SWIZZLE_UNUSED) { 762 continue; 763 } 764 if (swz == RC_SWIZZLE_UNUSED) { 765 swz = j_swz; 766 chan = j; 767 } else if (j_swz != swz) { 768 return 0; 769 } 770 } 771 772 if (const_index != -1) { 773 /* The instruction has two constant sources */ 774 return 0; 775 } else { 776 const_index = i; 777 } 778 } 779 780 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 781 inst_mul->U.I.SrcReg[const_index].Index)) { 782 return 0; 783 } 784 const_value = rc_get_constant_value(c, 785 inst_mul->U.I.SrcReg[const_index].Index, 786 inst_mul->U.I.SrcReg[const_index].Swizzle, 787 inst_mul->U.I.SrcReg[const_index].Negate, 788 chan); 789 790 if (const_value == 2.0f) { 791 omod_op = RC_OMOD_MUL_2; 792 } else if (const_value == 4.0f) { 793 omod_op = RC_OMOD_MUL_4; 794 } else if (const_value == 8.0f) { 795 omod_op = RC_OMOD_MUL_8; 796 } else if (const_value == (1.0f / 2.0f)) { 797 omod_op = RC_OMOD_DIV_2; 798 } else if (const_value == (1.0f / 4.0f)) { 799 omod_op = RC_OMOD_DIV_4; 800 } else if (const_value == (1.0f / 8.0f)) { 801 omod_op = RC_OMOD_DIV_8; 802 } else { 803 return 0; 804 } 805 806 writer_list = rc_variable_list_get_writers_one_reader(var_list, 807 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 808 809 if (!writer_list) { 810 return 0; 811 } 812 813 cb_data.Clobbered = 0; 814 cb_data.Writer = &inst_mul->U.I.DstReg; 815 for (var = writer_list->Item; var; var = var->Friend) { 816 struct rc_instruction * inst; 817 const struct rc_opcode_info * info = rc_get_opcode_info( 818 var->Inst->U.I.Opcode); 819 if (info->HasTexture) { 820 return 0; 821 } 822 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 823 return 0; 824 } 825 for (inst = inst_mul->Prev; inst != var->Inst; 826 inst = inst->Prev) { 827 rc_for_all_reads_mask(inst, omod_filter_reader_cb, 828 &cb_data); 829 rc_for_all_writes_mask(inst, omod_filter_writer_cb, 830 &cb_data); 831 if (cb_data.Clobbered) { 832 break; 833 } 834 } 835 } 836 837 if (cb_data.Clobbered) { 838 return 0; 839 } 840 841 writemask_sum = rc_variable_writemask_sum(writer_list->Item); 842 843 /* rc_normal_rewrite_writemask can't expand a previous writemask to store 844 * more channels replicated. 845 */ 846 if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask)) 847 return 0; 848 849 /* Rewrite the instructions */ 850 for (var = writer_list->Item; var; var = var->Friend) { 851 struct rc_variable * writer = var; 852 unsigned conversion_swizzle = rc_make_conversion_swizzle( 853 writemask_sum, 854 inst_mul->U.I.DstReg.WriteMask); 855 writer->Inst->U.I.Omod = omod_op; 856 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; 857 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; 858 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); 859 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 860 } 861 862 rc_remove_instruction(inst_mul); 863 864 return 1; 865} 866 867/** 868 * @return 869 * 0 if inst is still part of the program. 870 * 1 if inst is no longer part of the program. 871 */ 872static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 873{ 874 switch(inst->U.I.Opcode){ 875 case RC_OPCODE_ADD: 876 if (c->has_presub) { 877 if(peephole_add_presub_inv(c, inst)) 878 return 1; 879 if(peephole_add_presub_add(c, inst)) 880 return 1; 881 } 882 break; 883 default: 884 break; 885 } 886 return 0; 887} 888 889static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) { 890 unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); 891 for (unsigned int chan = 0; chan < 4; chan++) { 892 unsigned int swz = GET_SWZ(swz1, chan); 893 if (swz != RC_SWIZZLE_UNUSED) { 894 SET_SWZ(new_swz, chan, swz); 895 continue; 896 } 897 swz = GET_SWZ(swz2, chan); 898 SET_SWZ(new_swz, chan, swz); 899 } 900 return new_swz; 901} 902 903static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst) 904{ 905 unsigned int orig_dst_reg = inst->U.I.DstReg.Index; 906 unsigned int orig_dst_file = inst->U.I.DstReg.File; 907 unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; 908 unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index; 909 unsigned int orig_src_file = inst->U.I.SrcReg[0].File; 910 911 struct rc_instruction * cur = inst; 912 while (cur!= &c->Program.Instructions) { 913 cur = cur->Next; 914 const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode); 915 916 /* Keep it simple for now and stop when encountering any 917 * control flow. 918 */ 919 if (opcode->IsFlowControl) 920 return 0; 921 922 /* Stop when the original destination is overwritten */ 923 if (orig_dst_reg == cur->U.I.DstReg.Index && 924 orig_dst_file == cur->U.I.DstReg.File && 925 (orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0) 926 return 0; 927 928 /* Stop the search when the original instruction destination 929 * is used as a source for anything. 930 */ 931 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { 932 if (cur->U.I.SrcReg[i].File == orig_dst_file && 933 cur->U.I.SrcReg[i].Index == orig_dst_reg) 934 return 0; 935 } 936 937 if (cur->U.I.Opcode == RC_OPCODE_MOV && 938 cur->U.I.DstReg.File == orig_dst_file && 939 cur->U.I.DstReg.Index == orig_dst_reg && 940 (cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) { 941 942 /* We can merge the movs if one of them is from inline constant */ 943 if (cur->U.I.SrcReg[0].File == RC_FILE_NONE || 944 orig_src_file == RC_FILE_NONE) { 945 cur->U.I.DstReg.WriteMask |= orig_dst_wmask; 946 947 if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) { 948 cur->U.I.SrcReg[0].File = orig_src_file; 949 cur->U.I.SrcReg[0].Index = orig_src_reg; 950 cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs; 951 cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr; 952 } 953 cur->U.I.SrcReg[0].Swizzle = 954 merge_swizzles(cur->U.I.SrcReg[0].Swizzle, 955 inst->U.I.SrcReg[0].Swizzle); 956 957 cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate; 958 959 /* finally delete the original mov */ 960 rc_remove_instruction(inst); 961 962 return 1; 963 } 964 } 965 } 966 return 0; 967} 968 969void rc_optimize(struct radeon_compiler * c, void *user) 970{ 971 struct rc_instruction * inst = c->Program.Instructions.Next; 972 while(inst != &c->Program.Instructions) { 973 struct rc_instruction * cur = inst; 974 inst = inst->Next; 975 976 constant_folding(c, cur); 977 978 if(peephole(c, cur)) 979 continue; 980 981 if (cur->U.I.Opcode == RC_OPCODE_MOV) { 982 if (c->is_r500) { 983 if (merge_movs(c, cur)) 984 continue; 985 } 986 copy_propagate(c, cur); 987 /* cur may no longer be part of the program */ 988 } 989 } 990 991 if (!c->has_omod) { 992 return; 993 } 994 995 inst = c->Program.Instructions.Next; 996 struct rc_list * var_list = NULL; 997 while(inst != &c->Program.Instructions) { 998 struct rc_instruction * cur = inst; 999 inst = inst->Next; 1000 if (cur->U.I.Opcode == RC_OPCODE_MUL) { 1001 if (!var_list) 1002 var_list = rc_get_variables(c); 1003 if (peephole_mul_omod(c, cur, var_list)) 1004 var_list = NULL; 1005 } 1006 } 1007} 1008