1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2009 Nicolai Haehnle. 3bf215546Sopenharmony_ci * Copyright 2010 Tom Stellard <tstellar@gmail.com> 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * All Rights Reserved. 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining 8bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the 9bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 10bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 11bf215546Sopenharmony_ci * distribute, sublicense, and/or sell copies of the Software, and to 12bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 13bf215546Sopenharmony_ci * the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 16bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial 17bf215546Sopenharmony_ci * portions of the Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22bf215546Sopenharmony_ci * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23bf215546Sopenharmony_ci * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24bf215546Sopenharmony_ci * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25bf215546Sopenharmony_ci * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci */ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "util/u_math.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "radeon_dataflow.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include "radeon_compiler.h" 34bf215546Sopenharmony_ci#include "radeon_compiler_util.h" 35bf215546Sopenharmony_ci#include "radeon_list.h" 36bf215546Sopenharmony_ci#include "radeon_swizzle.h" 37bf215546Sopenharmony_ci#include "radeon_variable.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cistruct src_clobbered_reads_cb_data { 40bf215546Sopenharmony_ci rc_register_file File; 41bf215546Sopenharmony_ci unsigned int Index; 42bf215546Sopenharmony_ci unsigned int Mask; 43bf215546Sopenharmony_ci struct rc_reader_data * ReaderData; 44bf215546Sopenharmony_ci}; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_citypedef void (*rc_presub_replace_fn)(struct rc_instruction *, 47bf215546Sopenharmony_ci struct rc_instruction *, 48bf215546Sopenharmony_ci unsigned int); 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 51bf215546Sopenharmony_ci{ 52bf215546Sopenharmony_ci struct rc_src_register combine; 53bf215546Sopenharmony_ci combine.File = inner.File; 54bf215546Sopenharmony_ci combine.Index = inner.Index; 55bf215546Sopenharmony_ci combine.RelAddr = inner.RelAddr; 56bf215546Sopenharmony_ci if (outer.Abs) { 57bf215546Sopenharmony_ci combine.Abs = 1; 58bf215546Sopenharmony_ci combine.Negate = outer.Negate; 59bf215546Sopenharmony_ci } else { 60bf215546Sopenharmony_ci combine.Abs = inner.Abs; 61bf215546Sopenharmony_ci combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 62bf215546Sopenharmony_ci combine.Negate ^= outer.Negate; 63bf215546Sopenharmony_ci } 64bf215546Sopenharmony_ci combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 65bf215546Sopenharmony_ci return combine; 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_cistatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 69bf215546Sopenharmony_ci struct rc_src_register * src) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci rc_register_file file = src->File; 72bf215546Sopenharmony_ci struct rc_reader_data * reader_data = data; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci if(!rc_inst_can_use_presub(inst, 75bf215546Sopenharmony_ci reader_data->Writer->U.I.PreSub.Opcode, 76bf215546Sopenharmony_ci rc_swizzle_to_writemask(src->Swizzle), 77bf215546Sopenharmony_ci src, 78bf215546Sopenharmony_ci &reader_data->Writer->U.I.PreSub.SrcReg[0], 79bf215546Sopenharmony_ci &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 80bf215546Sopenharmony_ci reader_data->Abort = 1; 81bf215546Sopenharmony_ci return; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci /* XXX This could probably be handled better. */ 85bf215546Sopenharmony_ci if (file == RC_FILE_ADDRESS) { 86bf215546Sopenharmony_ci reader_data->Abort = 1; 87bf215546Sopenharmony_ci return; 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /* These instructions cannot read from the constants file. 91bf215546Sopenharmony_ci * see radeonTransformTEX() 92bf215546Sopenharmony_ci */ 93bf215546Sopenharmony_ci if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 94bf215546Sopenharmony_ci reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 95bf215546Sopenharmony_ci (inst->U.I.Opcode == RC_OPCODE_TEX || 96bf215546Sopenharmony_ci inst->U.I.Opcode == RC_OPCODE_TXB || 97bf215546Sopenharmony_ci inst->U.I.Opcode == RC_OPCODE_TXP || 98bf215546Sopenharmony_ci inst->U.I.Opcode == RC_OPCODE_TXD || 99bf215546Sopenharmony_ci inst->U.I.Opcode == RC_OPCODE_TXL || 100bf215546Sopenharmony_ci inst->U.I.Opcode == RC_OPCODE_KIL)){ 101bf215546Sopenharmony_ci reader_data->Abort = 1; 102bf215546Sopenharmony_ci return; 103bf215546Sopenharmony_ci } 104bf215546Sopenharmony_ci} 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_cistatic void src_clobbered_reads_cb( 107bf215546Sopenharmony_ci void * data, 108bf215546Sopenharmony_ci struct rc_instruction * inst, 109bf215546Sopenharmony_ci struct rc_src_register * src) 110bf215546Sopenharmony_ci{ 111bf215546Sopenharmony_ci struct src_clobbered_reads_cb_data * sc_data = data; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci if (src->File == sc_data->File 114bf215546Sopenharmony_ci && src->Index == sc_data->Index 115bf215546Sopenharmony_ci && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 121bf215546Sopenharmony_ci sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 122bf215546Sopenharmony_ci } 123bf215546Sopenharmony_ci} 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cistatic void is_src_clobbered_scan_write( 126bf215546Sopenharmony_ci void * data, 127bf215546Sopenharmony_ci struct rc_instruction * inst, 128bf215546Sopenharmony_ci rc_register_file file, 129bf215546Sopenharmony_ci unsigned int index, 130bf215546Sopenharmony_ci unsigned int mask) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci struct src_clobbered_reads_cb_data sc_data; 133bf215546Sopenharmony_ci struct rc_reader_data * reader_data = data; 134bf215546Sopenharmony_ci sc_data.File = file; 135bf215546Sopenharmony_ci sc_data.Index = index; 136bf215546Sopenharmony_ci sc_data.Mask = mask; 137bf215546Sopenharmony_ci sc_data.ReaderData = reader_data; 138bf215546Sopenharmony_ci rc_for_all_reads_src(reader_data->Writer, 139bf215546Sopenharmony_ci src_clobbered_reads_cb, &sc_data); 140bf215546Sopenharmony_ci} 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_cistatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 143bf215546Sopenharmony_ci{ 144bf215546Sopenharmony_ci struct rc_reader_data reader_data; 145bf215546Sopenharmony_ci unsigned int i; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 148bf215546Sopenharmony_ci inst_mov->U.I.WriteALUResult) 149bf215546Sopenharmony_ci return; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci /* Get a list of all the readers of this MOV instruction. */ 152bf215546Sopenharmony_ci reader_data.ExitOnAbort = 1; 153bf215546Sopenharmony_ci rc_get_readers(c, inst_mov, &reader_data, 154bf215546Sopenharmony_ci copy_propagate_scan_read, NULL, 155bf215546Sopenharmony_ci is_src_clobbered_scan_write); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci if (reader_data.Abort || reader_data.ReaderCount == 0 || reader_data.ReadersAfterEndloop) 158bf215546Sopenharmony_ci return; 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci /* We can propagate SaturateMode if all the readers are MOV instructions 161bf215546Sopenharmony_ci * without a presubtract operation, source negation and absolute. 162bf215546Sopenharmony_ci * In that case, we just move SaturateMode to all readers. */ 163bf215546Sopenharmony_ci if (inst_mov->U.I.SaturateMode) { 164bf215546Sopenharmony_ci for (i = 0; i < reader_data.ReaderCount; i++) { 165bf215546Sopenharmony_ci struct rc_instruction * inst = reader_data.Readers[i].Inst; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci if (inst->U.I.Opcode != RC_OPCODE_MOV || 168bf215546Sopenharmony_ci inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || 169bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Abs || 170bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Negate) { 171bf215546Sopenharmony_ci return; 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci } 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci /* Propagate the MOV instruction. */ 177bf215546Sopenharmony_ci for (i = 0; i < reader_data.ReaderCount; i++) { 178bf215546Sopenharmony_ci struct rc_instruction * inst = reader_data.Readers[i].Inst; 179bf215546Sopenharmony_ci *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 182bf215546Sopenharmony_ci inst->U.I.PreSub = inst_mov->U.I.PreSub; 183bf215546Sopenharmony_ci if (!inst->U.I.SaturateMode) 184bf215546Sopenharmony_ci inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci /* Finally, remove the original MOV instruction */ 188bf215546Sopenharmony_ci rc_remove_instruction(inst_mov); 189bf215546Sopenharmony_ci} 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci/** 192bf215546Sopenharmony_ci * Check if a source register is actually always the same 193bf215546Sopenharmony_ci * swizzle constant. 194bf215546Sopenharmony_ci */ 195bf215546Sopenharmony_cistatic int is_src_uniform_constant(struct rc_src_register src, 196bf215546Sopenharmony_ci rc_swizzle * pswz, unsigned int * pnegate) 197bf215546Sopenharmony_ci{ 198bf215546Sopenharmony_ci int have_used = 0; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci if (src.File != RC_FILE_NONE) { 201bf215546Sopenharmony_ci *pswz = 0; 202bf215546Sopenharmony_ci return 0; 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci for(unsigned int chan = 0; chan < 4; ++chan) { 206bf215546Sopenharmony_ci unsigned int swz = GET_SWZ(src.Swizzle, chan); 207bf215546Sopenharmony_ci if (swz < 4) { 208bf215546Sopenharmony_ci *pswz = 0; 209bf215546Sopenharmony_ci return 0; 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_UNUSED) 212bf215546Sopenharmony_ci continue; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci if (!have_used) { 215bf215546Sopenharmony_ci *pswz = swz; 216bf215546Sopenharmony_ci *pnegate = GET_BIT(src.Negate, chan); 217bf215546Sopenharmony_ci have_used = 1; 218bf215546Sopenharmony_ci } else { 219bf215546Sopenharmony_ci if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 220bf215546Sopenharmony_ci *pswz = 0; 221bf215546Sopenharmony_ci return 0; 222bf215546Sopenharmony_ci } 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci } 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci return 1; 227bf215546Sopenharmony_ci} 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_cistatic void constant_folding_mad(struct rc_instruction * inst) 230bf215546Sopenharmony_ci{ 231bf215546Sopenharmony_ci rc_swizzle swz = 0; 232bf215546Sopenharmony_ci unsigned int negate= 0; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 235bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ZERO) { 236bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MUL; 237bf215546Sopenharmony_ci return; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 242bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ONE) { 243bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_ADD; 244bf215546Sopenharmony_ci if (negate) 245bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 246bf215546Sopenharmony_ci inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 247bf215546Sopenharmony_ci return; 248bf215546Sopenharmony_ci } else if (swz == RC_SWIZZLE_ZERO) { 249bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 250bf215546Sopenharmony_ci inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 251bf215546Sopenharmony_ci return; 252bf215546Sopenharmony_ci } 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 256bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ONE) { 257bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_ADD; 258bf215546Sopenharmony_ci if (negate) 259bf215546Sopenharmony_ci inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 260bf215546Sopenharmony_ci inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 261bf215546Sopenharmony_ci return; 262bf215546Sopenharmony_ci } else if (swz == RC_SWIZZLE_ZERO) { 263bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 264bf215546Sopenharmony_ci inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 265bf215546Sopenharmony_ci return; 266bf215546Sopenharmony_ci } 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci} 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_cistatic void constant_folding_mul(struct rc_instruction * inst) 271bf215546Sopenharmony_ci{ 272bf215546Sopenharmony_ci rc_swizzle swz = 0; 273bf215546Sopenharmony_ci unsigned int negate = 0; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 276bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ONE) { 277bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 278bf215546Sopenharmony_ci inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 279bf215546Sopenharmony_ci if (negate) 280bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 281bf215546Sopenharmony_ci return; 282bf215546Sopenharmony_ci } else if (swz == RC_SWIZZLE_ZERO) { 283bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 284bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 285bf215546Sopenharmony_ci return; 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 290bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ONE) { 291bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 292bf215546Sopenharmony_ci if (negate) 293bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 294bf215546Sopenharmony_ci return; 295bf215546Sopenharmony_ci } else if (swz == RC_SWIZZLE_ZERO) { 296bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 297bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 298bf215546Sopenharmony_ci return; 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci} 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_cistatic void constant_folding_add(struct rc_instruction * inst) 304bf215546Sopenharmony_ci{ 305bf215546Sopenharmony_ci rc_swizzle swz = 0; 306bf215546Sopenharmony_ci unsigned int negate = 0; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 309bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ZERO) { 310bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 311bf215546Sopenharmony_ci inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 312bf215546Sopenharmony_ci return; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 317bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ZERO) { 318bf215546Sopenharmony_ci inst->U.I.Opcode = RC_OPCODE_MOV; 319bf215546Sopenharmony_ci return; 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci/** 325bf215546Sopenharmony_ci * Replace 0.0, 1.0 and 0.5 immediate constants by their 326bf215546Sopenharmony_ci * respective swizzles. Simplify instructions like ADD dst, src, 0; 327bf215546Sopenharmony_ci */ 328bf215546Sopenharmony_cistatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 329bf215546Sopenharmony_ci{ 330bf215546Sopenharmony_ci const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 331bf215546Sopenharmony_ci unsigned int i; 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 334bf215546Sopenharmony_ci for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 335bf215546Sopenharmony_ci struct rc_constant * constant; 336bf215546Sopenharmony_ci struct rc_src_register newsrc; 337bf215546Sopenharmony_ci int have_real_reference; 338bf215546Sopenharmony_ci unsigned int chan; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 341bf215546Sopenharmony_ci for (chan = 0; chan < 4; ++chan) 342bf215546Sopenharmony_ci if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 343bf215546Sopenharmony_ci break; 344bf215546Sopenharmony_ci if (chan == 4) { 345bf215546Sopenharmony_ci inst->U.I.SrcReg[src].File = RC_FILE_NONE; 346bf215546Sopenharmony_ci continue; 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* Convert immediates to swizzles. */ 350bf215546Sopenharmony_ci if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 351bf215546Sopenharmony_ci inst->U.I.SrcReg[src].RelAddr || 352bf215546Sopenharmony_ci inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 353bf215546Sopenharmony_ci continue; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci constant = 356bf215546Sopenharmony_ci &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci if (constant->Type != RC_CONSTANT_IMMEDIATE) 359bf215546Sopenharmony_ci continue; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci newsrc = inst->U.I.SrcReg[src]; 362bf215546Sopenharmony_ci have_real_reference = 0; 363bf215546Sopenharmony_ci for (chan = 0; chan < 4; ++chan) { 364bf215546Sopenharmony_ci unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 365bf215546Sopenharmony_ci unsigned int newswz; 366bf215546Sopenharmony_ci float imm; 367bf215546Sopenharmony_ci float baseimm; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci if (swz >= 4) 370bf215546Sopenharmony_ci continue; 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci imm = constant->u.Immediate[swz]; 373bf215546Sopenharmony_ci baseimm = imm; 374bf215546Sopenharmony_ci if (imm < 0.0) 375bf215546Sopenharmony_ci baseimm = -baseimm; 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci if (baseimm == 0.0) { 378bf215546Sopenharmony_ci newswz = RC_SWIZZLE_ZERO; 379bf215546Sopenharmony_ci } else if (baseimm == 1.0) { 380bf215546Sopenharmony_ci newswz = RC_SWIZZLE_ONE; 381bf215546Sopenharmony_ci } else if (baseimm == 0.5 && c->has_half_swizzles) { 382bf215546Sopenharmony_ci newswz = RC_SWIZZLE_HALF; 383bf215546Sopenharmony_ci } else { 384bf215546Sopenharmony_ci have_real_reference = 1; 385bf215546Sopenharmony_ci continue; 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci SET_SWZ(newsrc.Swizzle, chan, newswz); 389bf215546Sopenharmony_ci if (imm < 0.0 && !newsrc.Abs) 390bf215546Sopenharmony_ci newsrc.Negate ^= 1 << chan; 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci if (!have_real_reference) { 394bf215546Sopenharmony_ci newsrc.File = RC_FILE_NONE; 395bf215546Sopenharmony_ci newsrc.Index = 0; 396bf215546Sopenharmony_ci } 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci /* don't make the swizzle worse */ 399bf215546Sopenharmony_ci if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc)) 400bf215546Sopenharmony_ci continue; 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci inst->U.I.SrcReg[src] = newsrc; 403bf215546Sopenharmony_ci } 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci /* Simplify instructions based on constants */ 406bf215546Sopenharmony_ci if (inst->U.I.Opcode == RC_OPCODE_MAD) 407bf215546Sopenharmony_ci constant_folding_mad(inst); 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci /* note: MAD can simplify to MUL or ADD */ 410bf215546Sopenharmony_ci if (inst->U.I.Opcode == RC_OPCODE_MUL) 411bf215546Sopenharmony_ci constant_folding_mul(inst); 412bf215546Sopenharmony_ci else if (inst->U.I.Opcode == RC_OPCODE_ADD) 413bf215546Sopenharmony_ci constant_folding_add(inst); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci /* In case this instruction has been converted, make sure all of the 416bf215546Sopenharmony_ci * registers that are no longer used are empty. */ 417bf215546Sopenharmony_ci opcode = rc_get_opcode_info(inst->U.I.Opcode); 418bf215546Sopenharmony_ci for(i = opcode->NumSrcRegs; i < 3; i++) { 419bf215546Sopenharmony_ci memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 420bf215546Sopenharmony_ci } 421bf215546Sopenharmony_ci} 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci/** 424bf215546Sopenharmony_ci * If src and dst use the same register, this function returns a writemask that 425bf215546Sopenharmony_ci * indicates which components are read by src. Otherwise zero is returned. 426bf215546Sopenharmony_ci */ 427bf215546Sopenharmony_cistatic unsigned int src_reads_dst_mask(struct rc_src_register src, 428bf215546Sopenharmony_ci struct rc_dst_register dst) 429bf215546Sopenharmony_ci{ 430bf215546Sopenharmony_ci if (dst.File != src.File || dst.Index != src.Index) { 431bf215546Sopenharmony_ci return 0; 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci return rc_swizzle_to_writemask(src.Swizzle); 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 437bf215546Sopenharmony_ci * in any of its channels. Return 0 otherwise. */ 438bf215546Sopenharmony_cistatic int src_has_const_swz(struct rc_src_register src) { 439bf215546Sopenharmony_ci int chan; 440bf215546Sopenharmony_ci for(chan = 0; chan < 4; chan++) { 441bf215546Sopenharmony_ci unsigned int swz = GET_SWZ(src.Swizzle, chan); 442bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 443bf215546Sopenharmony_ci || swz == RC_SWIZZLE_ONE) { 444bf215546Sopenharmony_ci return 1; 445bf215546Sopenharmony_ci } 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci return 0; 448bf215546Sopenharmony_ci} 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_cistatic void presub_scan_read( 451bf215546Sopenharmony_ci void * data, 452bf215546Sopenharmony_ci struct rc_instruction * inst, 453bf215546Sopenharmony_ci struct rc_src_register * src) 454bf215546Sopenharmony_ci{ 455bf215546Sopenharmony_ci struct rc_reader_data * reader_data = data; 456bf215546Sopenharmony_ci rc_presubtract_op * presub_opcode = reader_data->CbData; 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci if (!rc_inst_can_use_presub(inst, *presub_opcode, 459bf215546Sopenharmony_ci reader_data->Writer->U.I.DstReg.WriteMask, 460bf215546Sopenharmony_ci src, 461bf215546Sopenharmony_ci &reader_data->Writer->U.I.SrcReg[0], 462bf215546Sopenharmony_ci &reader_data->Writer->U.I.SrcReg[1])) { 463bf215546Sopenharmony_ci reader_data->Abort = 1; 464bf215546Sopenharmony_ci return; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci} 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_cistatic int presub_helper( 469bf215546Sopenharmony_ci struct radeon_compiler * c, 470bf215546Sopenharmony_ci struct rc_instruction * inst_add, 471bf215546Sopenharmony_ci rc_presubtract_op presub_opcode, 472bf215546Sopenharmony_ci rc_presub_replace_fn presub_replace) 473bf215546Sopenharmony_ci{ 474bf215546Sopenharmony_ci struct rc_reader_data reader_data; 475bf215546Sopenharmony_ci unsigned int i; 476bf215546Sopenharmony_ci rc_presubtract_op cb_op = presub_opcode; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci reader_data.CbData = &cb_op; 479bf215546Sopenharmony_ci reader_data.ExitOnAbort = 1; 480bf215546Sopenharmony_ci rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 481bf215546Sopenharmony_ci is_src_clobbered_scan_write); 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci if (reader_data.Abort || reader_data.ReaderCount == 0) 484bf215546Sopenharmony_ci return 0; 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci for(i = 0; i < reader_data.ReaderCount; i++) { 487bf215546Sopenharmony_ci unsigned int src_index; 488bf215546Sopenharmony_ci struct rc_reader reader = reader_data.Readers[i]; 489bf215546Sopenharmony_ci const struct rc_opcode_info * info = 490bf215546Sopenharmony_ci rc_get_opcode_info(reader.Inst->U.I.Opcode); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 493bf215546Sopenharmony_ci if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 494bf215546Sopenharmony_ci presub_replace(inst_add, reader.Inst, src_index); 495bf215546Sopenharmony_ci } 496bf215546Sopenharmony_ci } 497bf215546Sopenharmony_ci return 1; 498bf215546Sopenharmony_ci} 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_cistatic void presub_replace_add( 501bf215546Sopenharmony_ci struct rc_instruction * inst_add, 502bf215546Sopenharmony_ci struct rc_instruction * inst_reader, 503bf215546Sopenharmony_ci unsigned int src_index) 504bf215546Sopenharmony_ci{ 505bf215546Sopenharmony_ci rc_presubtract_op presub_opcode; 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci /* This function assumes that inst_add->U.I.SrcReg[0] and 508bf215546Sopenharmony_ci * inst_add->U.I.SrcReg[1] aren't both negative. 509bf215546Sopenharmony_ci */ 510bf215546Sopenharmony_ci assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate)); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 513bf215546Sopenharmony_ci presub_opcode = RC_PRESUB_SUB; 514bf215546Sopenharmony_ci else 515bf215546Sopenharmony_ci presub_opcode = RC_PRESUB_ADD; 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[1].Negate) { 518bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 519bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 520bf215546Sopenharmony_ci } else { 521bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 522bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 523bf215546Sopenharmony_ci } 524bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 525bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 526bf215546Sopenharmony_ci inst_reader->U.I.PreSub.Opcode = presub_opcode; 527bf215546Sopenharmony_ci inst_reader->U.I.SrcReg[src_index] = 528bf215546Sopenharmony_ci chain_srcregs(inst_reader->U.I.SrcReg[src_index], 529bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0]); 530bf215546Sopenharmony_ci inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 531bf215546Sopenharmony_ci inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 532bf215546Sopenharmony_ci} 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_cistatic int is_presub_candidate( 535bf215546Sopenharmony_ci struct radeon_compiler * c, 536bf215546Sopenharmony_ci struct rc_instruction * inst) 537bf215546Sopenharmony_ci{ 538bf215546Sopenharmony_ci const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 539bf215546Sopenharmony_ci unsigned int i; 540bf215546Sopenharmony_ci unsigned int is_constant[2] = {0, 0}; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci assert(inst->U.I.Opcode == RC_OPCODE_ADD); 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 545bf215546Sopenharmony_ci || inst->U.I.SaturateMode 546bf215546Sopenharmony_ci || inst->U.I.WriteALUResult 547bf215546Sopenharmony_ci || inst->U.I.Omod) { 548bf215546Sopenharmony_ci return 0; 549bf215546Sopenharmony_ci } 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci /* If both sources use a constant swizzle, then we can't convert it to 552bf215546Sopenharmony_ci * a presubtract operation. In fact for the ADD and SUB presubtract 553bf215546Sopenharmony_ci * operations neither source can contain a constant swizzle. This 554bf215546Sopenharmony_ci * specific case is checked in peephole_add_presub_add() when 555bf215546Sopenharmony_ci * we make sure the swizzles for both sources are equal, so we 556bf215546Sopenharmony_ci * don't need to worry about it here. */ 557bf215546Sopenharmony_ci for (i = 0; i < 2; i++) { 558bf215546Sopenharmony_ci int chan; 559bf215546Sopenharmony_ci for (chan = 0; chan < 4; chan++) { 560bf215546Sopenharmony_ci rc_swizzle swz = 561bf215546Sopenharmony_ci get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 562bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_ONE 563bf215546Sopenharmony_ci || swz == RC_SWIZZLE_ZERO 564bf215546Sopenharmony_ci || swz == RC_SWIZZLE_HALF) { 565bf215546Sopenharmony_ci is_constant[i] = 1; 566bf215546Sopenharmony_ci } 567bf215546Sopenharmony_ci } 568bf215546Sopenharmony_ci } 569bf215546Sopenharmony_ci if (is_constant[0] && is_constant[1]) 570bf215546Sopenharmony_ci return 0; 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci for(i = 0; i < info->NumSrcRegs; i++) { 573bf215546Sopenharmony_ci struct rc_src_register src = inst->U.I.SrcReg[i]; 574bf215546Sopenharmony_ci if (src_reads_dst_mask(src, inst->U.I.DstReg)) 575bf215546Sopenharmony_ci return 0; 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci src.File = RC_FILE_PRESUB; 578bf215546Sopenharmony_ci if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 579bf215546Sopenharmony_ci return 0; 580bf215546Sopenharmony_ci } 581bf215546Sopenharmony_ci return 1; 582bf215546Sopenharmony_ci} 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_cistatic int peephole_add_presub_add( 585bf215546Sopenharmony_ci struct radeon_compiler * c, 586bf215546Sopenharmony_ci struct rc_instruction * inst_add) 587bf215546Sopenharmony_ci{ 588bf215546Sopenharmony_ci unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 589bf215546Sopenharmony_ci unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 590bf215546Sopenharmony_ci unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 593bf215546Sopenharmony_ci return 0; 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci /* src0 and src1 can't have absolute values */ 596bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 597bf215546Sopenharmony_ci return 0; 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci /* presub_replace_add() assumes only one is negative */ 600bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 601bf215546Sopenharmony_ci return 0; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci /* if src0 is negative, at least all bits of dstmask have to be set */ 604bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 605bf215546Sopenharmony_ci return 0; 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci /* if src1 is negative, at least all bits of dstmask have to be set */ 608bf215546Sopenharmony_ci if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 609bf215546Sopenharmony_ci return 0; 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci if (!is_presub_candidate(c, inst_add)) 612bf215546Sopenharmony_ci return 0; 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 615bf215546Sopenharmony_ci rc_remove_instruction(inst_add); 616bf215546Sopenharmony_ci return 1; 617bf215546Sopenharmony_ci } 618bf215546Sopenharmony_ci return 0; 619bf215546Sopenharmony_ci} 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_cistatic void presub_replace_inv( 622bf215546Sopenharmony_ci struct rc_instruction * inst_add, 623bf215546Sopenharmony_ci struct rc_instruction * inst_reader, 624bf215546Sopenharmony_ci unsigned int src_index) 625bf215546Sopenharmony_ci{ 626bf215546Sopenharmony_ci /* We must be careful not to modify inst_add, since it 627bf215546Sopenharmony_ci * is possible it will remain part of the program.*/ 628bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 629bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 630bf215546Sopenharmony_ci inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 631bf215546Sopenharmony_ci inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 632bf215546Sopenharmony_ci inst_reader->U.I.PreSub.SrcReg[0]); 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 635bf215546Sopenharmony_ci inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 636bf215546Sopenharmony_ci} 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci/** 639bf215546Sopenharmony_ci * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 640bf215546Sopenharmony_ci * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 641bf215546Sopenharmony_ci * of the add instruction must have the constatnt 1 swizzle. This function 642bf215546Sopenharmony_ci * does not check const registers to see if their value is 1.0, so it should 643bf215546Sopenharmony_ci * be called after the constant_folding optimization. 644bf215546Sopenharmony_ci * @return 645bf215546Sopenharmony_ci * 0 if the ADD instruction is still part of the program. 646bf215546Sopenharmony_ci * 1 if the ADD instruction is no longer part of the program. 647bf215546Sopenharmony_ci */ 648bf215546Sopenharmony_cistatic int peephole_add_presub_inv( 649bf215546Sopenharmony_ci struct radeon_compiler * c, 650bf215546Sopenharmony_ci struct rc_instruction * inst_add) 651bf215546Sopenharmony_ci{ 652bf215546Sopenharmony_ci unsigned int i, swz; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci if (!is_presub_candidate(c, inst_add)) 655bf215546Sopenharmony_ci return 0; 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci /* Check if src0 is 1. */ 658bf215546Sopenharmony_ci /* XXX It would be nice to use is_src_uniform_constant here, but that 659bf215546Sopenharmony_ci * function only works if the register's file is RC_FILE_NONE */ 660bf215546Sopenharmony_ci for(i = 0; i < 4; i++ ) { 661bf215546Sopenharmony_ci if (!(inst_add->U.I.DstReg.WriteMask & (1 << i))) 662bf215546Sopenharmony_ci continue; 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 665bf215546Sopenharmony_ci if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i)) 666bf215546Sopenharmony_ci return 0; 667bf215546Sopenharmony_ci } 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci /* Check src1. */ 670bf215546Sopenharmony_ci if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 671bf215546Sopenharmony_ci inst_add->U.I.DstReg.WriteMask 672bf215546Sopenharmony_ci || inst_add->U.I.SrcReg[1].Abs 673bf215546Sopenharmony_ci || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 674bf215546Sopenharmony_ci && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 675bf215546Sopenharmony_ci || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci return 0; 678bf215546Sopenharmony_ci } 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 681bf215546Sopenharmony_ci rc_remove_instruction(inst_add); 682bf215546Sopenharmony_ci return 1; 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci return 0; 685bf215546Sopenharmony_ci} 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_cistruct peephole_mul_cb_data { 688bf215546Sopenharmony_ci struct rc_dst_register * Writer; 689bf215546Sopenharmony_ci unsigned int Clobbered; 690bf215546Sopenharmony_ci}; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_cistatic void omod_filter_reader_cb( 693bf215546Sopenharmony_ci void * userdata, 694bf215546Sopenharmony_ci struct rc_instruction * inst, 695bf215546Sopenharmony_ci rc_register_file file, 696bf215546Sopenharmony_ci unsigned int index, 697bf215546Sopenharmony_ci unsigned int mask) 698bf215546Sopenharmony_ci{ 699bf215546Sopenharmony_ci struct peephole_mul_cb_data * d = userdata; 700bf215546Sopenharmony_ci if (rc_src_reads_dst_mask(file, mask, index, 701bf215546Sopenharmony_ci d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci d->Clobbered = 1; 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci} 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_cistatic void omod_filter_writer_cb( 708bf215546Sopenharmony_ci void * userdata, 709bf215546Sopenharmony_ci struct rc_instruction * inst, 710bf215546Sopenharmony_ci rc_register_file file, 711bf215546Sopenharmony_ci unsigned int index, 712bf215546Sopenharmony_ci unsigned int mask) 713bf215546Sopenharmony_ci{ 714bf215546Sopenharmony_ci struct peephole_mul_cb_data * d = userdata; 715bf215546Sopenharmony_ci if (file == d->Writer->File && index == d->Writer->Index && 716bf215546Sopenharmony_ci (mask & d->Writer->WriteMask)) { 717bf215546Sopenharmony_ci d->Clobbered = 1; 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci} 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_cistatic int peephole_mul_omod( 722bf215546Sopenharmony_ci struct radeon_compiler * c, 723bf215546Sopenharmony_ci struct rc_instruction * inst_mul, 724bf215546Sopenharmony_ci struct rc_list * var_list) 725bf215546Sopenharmony_ci{ 726bf215546Sopenharmony_ci unsigned int chan = 0, swz, i; 727bf215546Sopenharmony_ci int const_index = -1; 728bf215546Sopenharmony_ci int temp_index = -1; 729bf215546Sopenharmony_ci float const_value; 730bf215546Sopenharmony_ci rc_omod_op omod_op = RC_OMOD_DISABLE; 731bf215546Sopenharmony_ci struct rc_list * writer_list; 732bf215546Sopenharmony_ci struct rc_variable * var; 733bf215546Sopenharmony_ci struct peephole_mul_cb_data cb_data; 734bf215546Sopenharmony_ci unsigned writemask_sum; 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci for (i = 0; i < 2; i++) { 737bf215546Sopenharmony_ci unsigned int j; 738bf215546Sopenharmony_ci if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 739bf215546Sopenharmony_ci && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 740bf215546Sopenharmony_ci return 0; 741bf215546Sopenharmony_ci } 742bf215546Sopenharmony_ci if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 743bf215546Sopenharmony_ci if (temp_index != -1) { 744bf215546Sopenharmony_ci /* The instruction has two temp sources */ 745bf215546Sopenharmony_ci return 0; 746bf215546Sopenharmony_ci } else { 747bf215546Sopenharmony_ci temp_index = i; 748bf215546Sopenharmony_ci continue; 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci } 751bf215546Sopenharmony_ci /* If we get this far Src[i] must be a constant src */ 752bf215546Sopenharmony_ci if (inst_mul->U.I.SrcReg[i].Negate) { 753bf215546Sopenharmony_ci return 0; 754bf215546Sopenharmony_ci } 755bf215546Sopenharmony_ci /* The constant src needs to read from the same swizzle */ 756bf215546Sopenharmony_ci swz = RC_SWIZZLE_UNUSED; 757bf215546Sopenharmony_ci chan = 0; 758bf215546Sopenharmony_ci for (j = 0; j < 4; j++) { 759bf215546Sopenharmony_ci unsigned int j_swz = 760bf215546Sopenharmony_ci GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 761bf215546Sopenharmony_ci if (j_swz == RC_SWIZZLE_UNUSED) { 762bf215546Sopenharmony_ci continue; 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci if (swz == RC_SWIZZLE_UNUSED) { 765bf215546Sopenharmony_ci swz = j_swz; 766bf215546Sopenharmony_ci chan = j; 767bf215546Sopenharmony_ci } else if (j_swz != swz) { 768bf215546Sopenharmony_ci return 0; 769bf215546Sopenharmony_ci } 770bf215546Sopenharmony_ci } 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci if (const_index != -1) { 773bf215546Sopenharmony_ci /* The instruction has two constant sources */ 774bf215546Sopenharmony_ci return 0; 775bf215546Sopenharmony_ci } else { 776bf215546Sopenharmony_ci const_index = i; 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci } 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 781bf215546Sopenharmony_ci inst_mul->U.I.SrcReg[const_index].Index)) { 782bf215546Sopenharmony_ci return 0; 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci const_value = rc_get_constant_value(c, 785bf215546Sopenharmony_ci inst_mul->U.I.SrcReg[const_index].Index, 786bf215546Sopenharmony_ci inst_mul->U.I.SrcReg[const_index].Swizzle, 787bf215546Sopenharmony_ci inst_mul->U.I.SrcReg[const_index].Negate, 788bf215546Sopenharmony_ci chan); 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci if (const_value == 2.0f) { 791bf215546Sopenharmony_ci omod_op = RC_OMOD_MUL_2; 792bf215546Sopenharmony_ci } else if (const_value == 4.0f) { 793bf215546Sopenharmony_ci omod_op = RC_OMOD_MUL_4; 794bf215546Sopenharmony_ci } else if (const_value == 8.0f) { 795bf215546Sopenharmony_ci omod_op = RC_OMOD_MUL_8; 796bf215546Sopenharmony_ci } else if (const_value == (1.0f / 2.0f)) { 797bf215546Sopenharmony_ci omod_op = RC_OMOD_DIV_2; 798bf215546Sopenharmony_ci } else if (const_value == (1.0f / 4.0f)) { 799bf215546Sopenharmony_ci omod_op = RC_OMOD_DIV_4; 800bf215546Sopenharmony_ci } else if (const_value == (1.0f / 8.0f)) { 801bf215546Sopenharmony_ci omod_op = RC_OMOD_DIV_8; 802bf215546Sopenharmony_ci } else { 803bf215546Sopenharmony_ci return 0; 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci writer_list = rc_variable_list_get_writers_one_reader(var_list, 807bf215546Sopenharmony_ci RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci if (!writer_list) { 810bf215546Sopenharmony_ci return 0; 811bf215546Sopenharmony_ci } 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci cb_data.Clobbered = 0; 814bf215546Sopenharmony_ci cb_data.Writer = &inst_mul->U.I.DstReg; 815bf215546Sopenharmony_ci for (var = writer_list->Item; var; var = var->Friend) { 816bf215546Sopenharmony_ci struct rc_instruction * inst; 817bf215546Sopenharmony_ci const struct rc_opcode_info * info = rc_get_opcode_info( 818bf215546Sopenharmony_ci var->Inst->U.I.Opcode); 819bf215546Sopenharmony_ci if (info->HasTexture) { 820bf215546Sopenharmony_ci return 0; 821bf215546Sopenharmony_ci } 822bf215546Sopenharmony_ci if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 823bf215546Sopenharmony_ci return 0; 824bf215546Sopenharmony_ci } 825bf215546Sopenharmony_ci for (inst = inst_mul->Prev; inst != var->Inst; 826bf215546Sopenharmony_ci inst = inst->Prev) { 827bf215546Sopenharmony_ci rc_for_all_reads_mask(inst, omod_filter_reader_cb, 828bf215546Sopenharmony_ci &cb_data); 829bf215546Sopenharmony_ci rc_for_all_writes_mask(inst, omod_filter_writer_cb, 830bf215546Sopenharmony_ci &cb_data); 831bf215546Sopenharmony_ci if (cb_data.Clobbered) { 832bf215546Sopenharmony_ci break; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci } 835bf215546Sopenharmony_ci } 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci if (cb_data.Clobbered) { 838bf215546Sopenharmony_ci return 0; 839bf215546Sopenharmony_ci } 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci writemask_sum = rc_variable_writemask_sum(writer_list->Item); 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci /* rc_normal_rewrite_writemask can't expand a previous writemask to store 844bf215546Sopenharmony_ci * more channels replicated. 845bf215546Sopenharmony_ci */ 846bf215546Sopenharmony_ci if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask)) 847bf215546Sopenharmony_ci return 0; 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci /* Rewrite the instructions */ 850bf215546Sopenharmony_ci for (var = writer_list->Item; var; var = var->Friend) { 851bf215546Sopenharmony_ci struct rc_variable * writer = var; 852bf215546Sopenharmony_ci unsigned conversion_swizzle = rc_make_conversion_swizzle( 853bf215546Sopenharmony_ci writemask_sum, 854bf215546Sopenharmony_ci inst_mul->U.I.DstReg.WriteMask); 855bf215546Sopenharmony_ci writer->Inst->U.I.Omod = omod_op; 856bf215546Sopenharmony_ci writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; 857bf215546Sopenharmony_ci writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; 858bf215546Sopenharmony_ci rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); 859bf215546Sopenharmony_ci writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci rc_remove_instruction(inst_mul); 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci return 1; 865bf215546Sopenharmony_ci} 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci/** 868bf215546Sopenharmony_ci * @return 869bf215546Sopenharmony_ci * 0 if inst is still part of the program. 870bf215546Sopenharmony_ci * 1 if inst is no longer part of the program. 871bf215546Sopenharmony_ci */ 872bf215546Sopenharmony_cistatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 873bf215546Sopenharmony_ci{ 874bf215546Sopenharmony_ci switch(inst->U.I.Opcode){ 875bf215546Sopenharmony_ci case RC_OPCODE_ADD: 876bf215546Sopenharmony_ci if (c->has_presub) { 877bf215546Sopenharmony_ci if(peephole_add_presub_inv(c, inst)) 878bf215546Sopenharmony_ci return 1; 879bf215546Sopenharmony_ci if(peephole_add_presub_add(c, inst)) 880bf215546Sopenharmony_ci return 1; 881bf215546Sopenharmony_ci } 882bf215546Sopenharmony_ci break; 883bf215546Sopenharmony_ci default: 884bf215546Sopenharmony_ci break; 885bf215546Sopenharmony_ci } 886bf215546Sopenharmony_ci return 0; 887bf215546Sopenharmony_ci} 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_cistatic unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) { 890bf215546Sopenharmony_ci unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); 891bf215546Sopenharmony_ci for (unsigned int chan = 0; chan < 4; chan++) { 892bf215546Sopenharmony_ci unsigned int swz = GET_SWZ(swz1, chan); 893bf215546Sopenharmony_ci if (swz != RC_SWIZZLE_UNUSED) { 894bf215546Sopenharmony_ci SET_SWZ(new_swz, chan, swz); 895bf215546Sopenharmony_ci continue; 896bf215546Sopenharmony_ci } 897bf215546Sopenharmony_ci swz = GET_SWZ(swz2, chan); 898bf215546Sopenharmony_ci SET_SWZ(new_swz, chan, swz); 899bf215546Sopenharmony_ci } 900bf215546Sopenharmony_ci return new_swz; 901bf215546Sopenharmony_ci} 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_cistatic int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst) 904bf215546Sopenharmony_ci{ 905bf215546Sopenharmony_ci unsigned int orig_dst_reg = inst->U.I.DstReg.Index; 906bf215546Sopenharmony_ci unsigned int orig_dst_file = inst->U.I.DstReg.File; 907bf215546Sopenharmony_ci unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask; 908bf215546Sopenharmony_ci unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index; 909bf215546Sopenharmony_ci unsigned int orig_src_file = inst->U.I.SrcReg[0].File; 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci struct rc_instruction * cur = inst; 912bf215546Sopenharmony_ci while (cur!= &c->Program.Instructions) { 913bf215546Sopenharmony_ci cur = cur->Next; 914bf215546Sopenharmony_ci const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode); 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci /* Keep it simple for now and stop when encountering any 917bf215546Sopenharmony_ci * control flow. 918bf215546Sopenharmony_ci */ 919bf215546Sopenharmony_ci if (opcode->IsFlowControl) 920bf215546Sopenharmony_ci return 0; 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci /* Stop when the original destination is overwritten */ 923bf215546Sopenharmony_ci if (orig_dst_reg == cur->U.I.DstReg.Index && 924bf215546Sopenharmony_ci orig_dst_file == cur->U.I.DstReg.File && 925bf215546Sopenharmony_ci (orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0) 926bf215546Sopenharmony_ci return 0; 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci /* Stop the search when the original instruction destination 929bf215546Sopenharmony_ci * is used as a source for anything. 930bf215546Sopenharmony_ci */ 931bf215546Sopenharmony_ci for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { 932bf215546Sopenharmony_ci if (cur->U.I.SrcReg[i].File == orig_dst_file && 933bf215546Sopenharmony_ci cur->U.I.SrcReg[i].Index == orig_dst_reg) 934bf215546Sopenharmony_ci return 0; 935bf215546Sopenharmony_ci } 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci if (cur->U.I.Opcode == RC_OPCODE_MOV && 938bf215546Sopenharmony_ci cur->U.I.DstReg.File == orig_dst_file && 939bf215546Sopenharmony_ci cur->U.I.DstReg.Index == orig_dst_reg && 940bf215546Sopenharmony_ci (cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) { 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci /* We can merge the movs if one of them is from inline constant */ 943bf215546Sopenharmony_ci if (cur->U.I.SrcReg[0].File == RC_FILE_NONE || 944bf215546Sopenharmony_ci orig_src_file == RC_FILE_NONE) { 945bf215546Sopenharmony_ci cur->U.I.DstReg.WriteMask |= orig_dst_wmask; 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) { 948bf215546Sopenharmony_ci cur->U.I.SrcReg[0].File = orig_src_file; 949bf215546Sopenharmony_ci cur->U.I.SrcReg[0].Index = orig_src_reg; 950bf215546Sopenharmony_ci cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs; 951bf215546Sopenharmony_ci cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr; 952bf215546Sopenharmony_ci } 953bf215546Sopenharmony_ci cur->U.I.SrcReg[0].Swizzle = 954bf215546Sopenharmony_ci merge_swizzles(cur->U.I.SrcReg[0].Swizzle, 955bf215546Sopenharmony_ci inst->U.I.SrcReg[0].Swizzle); 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate; 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci /* finally delete the original mov */ 960bf215546Sopenharmony_ci rc_remove_instruction(inst); 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci return 1; 963bf215546Sopenharmony_ci } 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci return 0; 967bf215546Sopenharmony_ci} 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_civoid rc_optimize(struct radeon_compiler * c, void *user) 970bf215546Sopenharmony_ci{ 971bf215546Sopenharmony_ci struct rc_instruction * inst = c->Program.Instructions.Next; 972bf215546Sopenharmony_ci while(inst != &c->Program.Instructions) { 973bf215546Sopenharmony_ci struct rc_instruction * cur = inst; 974bf215546Sopenharmony_ci inst = inst->Next; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci constant_folding(c, cur); 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci if(peephole(c, cur)) 979bf215546Sopenharmony_ci continue; 980bf215546Sopenharmony_ci 981bf215546Sopenharmony_ci if (cur->U.I.Opcode == RC_OPCODE_MOV) { 982bf215546Sopenharmony_ci if (c->is_r500) { 983bf215546Sopenharmony_ci if (merge_movs(c, cur)) 984bf215546Sopenharmony_ci continue; 985bf215546Sopenharmony_ci } 986bf215546Sopenharmony_ci copy_propagate(c, cur); 987bf215546Sopenharmony_ci /* cur may no longer be part of the program */ 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci } 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci if (!c->has_omod) { 992bf215546Sopenharmony_ci return; 993bf215546Sopenharmony_ci } 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci inst = c->Program.Instructions.Next; 996bf215546Sopenharmony_ci struct rc_list * var_list = NULL; 997bf215546Sopenharmony_ci while(inst != &c->Program.Instructions) { 998bf215546Sopenharmony_ci struct rc_instruction * cur = inst; 999bf215546Sopenharmony_ci inst = inst->Next; 1000bf215546Sopenharmony_ci if (cur->U.I.Opcode == RC_OPCODE_MUL) { 1001bf215546Sopenharmony_ci if (!var_list) 1002bf215546Sopenharmony_ci var_list = rc_get_variables(c); 1003bf215546Sopenharmony_ci if (peephole_mul_omod(c, cur, var_list)) 1004bf215546Sopenharmony_ci var_list = NULL; 1005bf215546Sopenharmony_ci } 1006bf215546Sopenharmony_ci } 1007bf215546Sopenharmony_ci} 1008