1/* 2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 22 23#include "radeon_compiler.h" 24 25#include <stdarg.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "util/u_debug.h" 31#include "pipe/p_state.h" 32#include "radeon_dataflow.h" 33#include "radeon_program.h" 34#include "radeon_program_pair.h" 35#include "radeon_regalloc.h" 36#include "radeon_compiler_util.h" 37 38 39void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs) 40{ 41 memset(c, 0, sizeof(*c)); 42 43 memory_pool_init(&c->Pool); 44 c->Program.Instructions.Prev = &c->Program.Instructions; 45 c->Program.Instructions.Next = &c->Program.Instructions; 46 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; 47 c->regalloc_state = rs; 48} 49 50void rc_destroy(struct radeon_compiler * c) 51{ 52 rc_constants_destroy(&c->Program.Constants); 53 memory_pool_destroy(&c->Pool); 54 free(c->ErrorMsg); 55} 56 57void rc_debug(struct radeon_compiler * c, const char * fmt, ...) 58{ 59 va_list ap; 60 61 if (!(c->Debug & RC_DBG_LOG)) 62 return; 63 64 va_start(ap, fmt); 65 vfprintf(stderr, fmt, ap); 66 va_end(ap); 67} 68 69void rc_error(struct radeon_compiler * c, const char * fmt, ...) 70{ 71 va_list ap; 72 73 c->Error = 1; 74 75 if (!c->ErrorMsg) { 76 /* Only remember the first error */ 77 char buf[1024]; 78 int written; 79 80 va_start(ap, fmt); 81 written = vsnprintf(buf, sizeof(buf), fmt, ap); 82 va_end(ap); 83 84 if (written < sizeof(buf)) { 85 c->ErrorMsg = strdup(buf); 86 } else { 87 c->ErrorMsg = malloc(written + 1); 88 89 va_start(ap, fmt); 90 vsnprintf(c->ErrorMsg, written + 1, fmt, ap); 91 va_end(ap); 92 } 93 } 94 95 if (c->Debug & RC_DBG_LOG) { 96 fprintf(stderr, "r300compiler error: "); 97 98 va_start(ap, fmt); 99 vfprintf(stderr, fmt, ap); 100 va_end(ap); 101 } 102} 103 104int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) 105{ 106 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); 107 return 1; 108} 109 110/** 111 * Recompute c->Program.InputsRead and c->Program.OutputsWritten 112 * based on which inputs and outputs are actually referenced 113 * in program instructions. 114 */ 115void rc_calculate_inputs_outputs(struct radeon_compiler * c) 116{ 117 struct rc_instruction *inst; 118 119 c->Program.InputsRead = 0; 120 c->Program.OutputsWritten = 0; 121 122 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) 123 { 124 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 125 int i; 126 127 for (i = 0; i < opcode->NumSrcRegs; ++i) { 128 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) 129 c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index; 130 } 131 132 if (opcode->HasDstReg) { 133 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) 134 c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index; 135 } 136 } 137} 138 139/** 140 * Rewrite the program such that a given output is duplicated. 141 */ 142void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) 143{ 144 unsigned tempreg = rc_find_free_temporary(c); 145 struct rc_instruction * inst; 146 struct rc_instruction * insert_pos = c->Program.Instructions.Prev; 147 struct rc_instruction * last_write_inst = NULL; 148 unsigned branch_depth = 0; 149 unsigned loop_depth = 0; 150 bool emit_after_control_flow = false; 151 unsigned num_writes = 0; 152 153 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 154 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 155 156 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) 157 loop_depth++; 158 if (inst->U.I.Opcode == RC_OPCODE_IF) 159 branch_depth++; 160 if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) || 161 (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--)) 162 if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) { 163 insert_pos = inst; 164 emit_after_control_flow = false; 165 } 166 167 if (opcode->HasDstReg) { 168 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { 169 num_writes++; 170 inst->U.I.DstReg.File = RC_FILE_TEMPORARY; 171 inst->U.I.DstReg.Index = tempreg; 172 insert_pos = inst; 173 last_write_inst = inst; 174 if (loop_depth != 0 && branch_depth != 0) 175 emit_after_control_flow = true; 176 } 177 } 178 } 179 180 /* If there is only a single write, just duplicate the whole instruction instead. 181 * We can do this even when the single write was is a control flow. 182 */ 183 if (num_writes == 1) { 184 last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT; 185 last_write_inst->U.I.DstReg.Index = output; 186 187 inst = rc_insert_new_instruction(c, last_write_inst); 188 struct rc_instruction * prev = inst->Prev; 189 struct rc_instruction * next = inst->Next; 190 memcpy(inst, last_write_inst, sizeof(struct rc_instruction)); 191 inst->Prev = prev; 192 inst->Next = next; 193 inst->U.I.DstReg.Index = dup_output; 194 } else { 195 inst = rc_insert_new_instruction(c, insert_pos); 196 inst->U.I.Opcode = RC_OPCODE_MOV; 197 inst->U.I.DstReg.File = RC_FILE_OUTPUT; 198 inst->U.I.DstReg.Index = output; 199 200 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 201 inst->U.I.SrcReg[0].Index = tempreg; 202 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; 203 204 inst = rc_insert_new_instruction(c, inst); 205 inst->U.I.Opcode = RC_OPCODE_MOV; 206 inst->U.I.DstReg.File = RC_FILE_OUTPUT; 207 inst->U.I.DstReg.Index = dup_output; 208 209 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 210 inst->U.I.SrcReg[0].Index = tempreg; 211 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; 212 } 213 214 c->Program.OutputsWritten |= 1U << dup_output; 215} 216 217 218/** 219 * Introduce standard code fragment to deal with fragment.position. 220 */ 221void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, 222 int full_vtransform) 223{ 224 unsigned tempregi = rc_find_free_temporary(c); 225 struct rc_instruction * inst_rcp; 226 struct rc_instruction * inst_mul; 227 struct rc_instruction * inst_mad; 228 struct rc_instruction * inst; 229 230 c->Program.InputsRead &= ~(1U << wpos); 231 c->Program.InputsRead |= 1U << new_input; 232 233 /* perspective divide */ 234 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); 235 inst_rcp->U.I.Opcode = RC_OPCODE_RCP; 236 237 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; 238 inst_rcp->U.I.DstReg.Index = tempregi; 239 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; 240 241 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; 242 inst_rcp->U.I.SrcReg[0].Index = new_input; 243 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; 244 245 inst_mul = rc_insert_new_instruction(c, inst_rcp); 246 inst_mul->U.I.Opcode = RC_OPCODE_MUL; 247 248 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; 249 inst_mul->U.I.DstReg.Index = tempregi; 250 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; 251 252 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; 253 inst_mul->U.I.SrcReg[0].Index = new_input; 254 255 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; 256 inst_mul->U.I.SrcReg[1].Index = tempregi; 257 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; 258 259 /* viewport transformation */ 260 inst_mad = rc_insert_new_instruction(c, inst_mul); 261 inst_mad->U.I.Opcode = RC_OPCODE_MAD; 262 263 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; 264 inst_mad->U.I.DstReg.Index = tempregi; 265 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; 266 267 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 268 inst_mad->U.I.SrcReg[0].Index = tempregi; 269 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; 270 271 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; 272 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; 273 274 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; 275 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; 276 277 if (full_vtransform) { 278 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); 279 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); 280 } else { 281 inst_mad->U.I.SrcReg[1].Index = 282 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); 283 } 284 285 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { 286 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 287 unsigned i; 288 289 for(i = 0; i < opcode->NumSrcRegs; i++) { 290 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && 291 inst->U.I.SrcReg[i].Index == wpos) { 292 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; 293 inst->U.I.SrcReg[i].Index = tempregi; 294 } 295 } 296 } 297} 298 299 300/** 301 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. 302 * Gallium and OpenGL define it the other way around. 303 * 304 * So let's just negate FACE at the beginning of the shader and rewrite the rest 305 * of the shader to read from the newly allocated temporary. 306 */ 307void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) 308{ 309 unsigned tempregi = rc_find_free_temporary(c); 310 struct rc_instruction *inst_add; 311 struct rc_instruction *inst; 312 313 /* perspective divide */ 314 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); 315 inst_add->U.I.Opcode = RC_OPCODE_ADD; 316 317 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; 318 inst_add->U.I.DstReg.Index = tempregi; 319 inst_add->U.I.DstReg.WriteMask = RC_MASK_X; 320 321 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; 322 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; 323 324 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; 325 inst_add->U.I.SrcReg[1].Index = face; 326 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; 327 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; 328 329 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { 330 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 331 unsigned i; 332 333 for(i = 0; i < opcode->NumSrcRegs; i++) { 334 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && 335 inst->U.I.SrcReg[i].Index == face) { 336 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; 337 inst->U.I.SrcReg[i].Index = tempregi; 338 } 339 } 340 } 341} 342 343static void reg_count_callback(void * userdata, struct rc_instruction * inst, 344 rc_register_file file, unsigned int index, unsigned int mask) 345{ 346 struct rc_program_stats *s = userdata; 347 if (file == RC_FILE_TEMPORARY) 348 (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; 349 if (file == RC_FILE_INLINE) 350 s->num_inline_literals++; 351 if (file == RC_FILE_CONSTANT) 352 s->num_consts = MAX2(s->num_consts, index + 1); 353} 354 355void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) 356{ 357 struct rc_instruction * tmp; 358 memset(s, 0, sizeof(*s)); 359 360 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; 361 tmp = tmp->Next){ 362 const struct rc_opcode_info * info; 363 rc_for_all_reads_mask(tmp, reg_count_callback, s); 364 if (tmp->Type == RC_INSTRUCTION_NORMAL) { 365 info = rc_get_opcode_info(tmp->U.I.Opcode); 366 if (info->Opcode == RC_OPCODE_BEGIN_TEX) 367 continue; 368 if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) 369 s->num_presub_ops++; 370 } else { 371 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) 372 s->num_presub_ops++; 373 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) 374 s->num_presub_ops++; 375 /* Assuming alpha will never be a flow control or 376 * a tex instruction. */ 377 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) 378 s->num_alpha_insts++; 379 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) 380 s->num_rgb_insts++; 381 if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && 382 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { 383 s->num_omod_ops++; 384 } 385 if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && 386 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { 387 s->num_omod_ops++; 388 } 389 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); 390 } 391 if (info->IsFlowControl) { 392 s->num_fc_insts++; 393 if (info->Opcode == RC_OPCODE_BGNLOOP) 394 s->num_loops++; 395 } 396 /* VS flow control was already translated to the predicate instructions */ 397 if (c->type == RC_VERTEX_PROGRAM) 398 if (strstr(info->Name, "PRED") != NULL) 399 s->num_pred_insts++; 400 401 if (info->HasTexture) 402 s->num_tex_insts++; 403 s->num_insts++; 404 } 405 /* Increment here because the reg_count_callback store the max 406 * temporary reg index in s->nun_temp_regs. */ 407 s->num_temp_regs++; 408} 409 410static void print_stats(struct radeon_compiler * c) 411{ 412 struct rc_program_stats s; 413 414 rc_get_stats(c, &s); 415 416 /* Note that we print some dummy values for instruction categories that 417 * only the FS has, becasue shader-db's report.py wants all shaders to 418 * have the same set. 419 */ 420 util_debug_message(c->debug, SHADER_INFO, "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, %u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits", 421 c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", 422 s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts, 423 s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops, 424 s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals); 425} 426 427static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { 428 "Vertex Program", 429 "Fragment Program" 430}; 431 432bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) 433{ 434 for (unsigned i = 0; list[i].name; i++) { 435 if (list[i].predicate) { 436 list[i].run(c, list[i].user); 437 438 if (c->Error) 439 return false; 440 441 if ((c->Debug & RC_DBG_LOG) && list[i].dump) { 442 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); 443 rc_print_program(&c->Program); 444 } 445 } 446 } 447 return true; 448} 449 450/* Executes a list of compiler passes given in the parameter 'list'. */ 451void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) 452{ 453 if (c->Debug & RC_DBG_LOG) { 454 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); 455 rc_print_program(&c->Program); 456 } 457 458 if(rc_run_compiler_passes(c, list)) { 459 print_stats(c); 460 } 461} 462 463void rc_validate_final_shader(struct radeon_compiler *c, void *user) 464{ 465 /* Check the number of constants. */ 466 if (c->Program.Constants.Count > c->max_constants) { 467 rc_error(c, "Too many constants. Max: %i, Got: %i\n", 468 c->max_constants, c->Program.Constants.Count); 469 } 470} 471