1/* 2 * Linearize - walk the statement tree (but _not_ the expressions) 3 * to generate a linear version of it and the basic blocks. 4 * 5 * NOTE! We're not interested in the actual sub-expressions yet, 6 * even though they can generate conditional branches and 7 * subroutine calls. That's all "local" behaviour. 8 * 9 * Copyright (C) 2004 Linus Torvalds 10 * Copyright (C) 2004 Christopher Li 11 */ 12 13#include <string.h> 14#include <stdarg.h> 15#include <stdlib.h> 16#include <stdio.h> 17#include <assert.h> 18 19#include "parse.h" 20#include "expression.h" 21#include "linearize.h" 22#include "optimize.h" 23#include "flow.h" 24#include "target.h" 25 26static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt); 27static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr); 28 29static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, struct symbol *from, int op, pseudo_t src); 30static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right); 31static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym); 32 33static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to); 34 35struct pseudo void_pseudo = {}; 36 37static struct position current_pos; 38 39ALLOCATOR(pseudo_user, "pseudo_user"); 40 41static struct instruction *alloc_instruction(int opcode, int size) 42{ 43 struct instruction * insn = __alloc_instruction(0); 44 insn->opcode = opcode; 45 insn->size = size; 46 insn->pos = current_pos; 47 return insn; 48} 49 50static inline int type_size(struct symbol *type) 51{ 52 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0; 53} 54 55static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type) 56{ 57 struct instruction *insn = alloc_instruction(opcode, type_size(type)); 58 insn->type = type; 59 return insn; 60} 61 62static struct entrypoint *alloc_entrypoint(void) 63{ 64 return __alloc_entrypoint(0); 65} 66 67static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos) 68{ 69 static int nr; 70 struct basic_block *bb = __alloc_basic_block(0); 71 bb->pos = pos; 72 bb->ep = ep; 73 bb->nr = nr++; 74 return bb; 75} 76 77static struct multijmp *alloc_multijmp(struct basic_block *target, long long begin, long long end) 78{ 79 struct multijmp *multijmp = __alloc_multijmp(0); 80 multijmp->target = target; 81 multijmp->begin = begin; 82 multijmp->end = end; 83 return multijmp; 84} 85 86const char *show_label(struct basic_block *bb) 87{ 88 static int n; 89 static char buffer[4][16]; 90 char *buf = buffer[3 & ++n]; 91 92 if (!bb) 93 return ".L???"; 94 snprintf(buf, 64, ".L%u", bb->nr); 95 return buf; 96} 97 98const char *show_pseudo(pseudo_t pseudo) 99{ 100 static int n; 101 static char buffer[4][64]; 102 char *buf; 103 int i; 104 105 if (!pseudo) 106 return "no pseudo"; 107 if (pseudo == VOID) 108 return "VOID"; 109 buf = buffer[3 & ++n]; 110 switch(pseudo->type) { 111 case PSEUDO_SYM: { 112 struct symbol *sym = pseudo->sym; 113 struct expression *expr; 114 115 if (!sym) { 116 snprintf(buf, 64, "<bad symbol>"); 117 break; 118 } 119 if (sym->bb_target) { 120 snprintf(buf, 64, "%s", show_label(sym->bb_target)); 121 break; 122 } 123 if (sym->ident) { 124 snprintf(buf, 64, "%s", show_ident(sym->ident)); 125 break; 126 } 127 expr = sym->initializer; 128 snprintf(buf, 64, "<anon symbol:%p>", verbose ? sym : NULL); 129 if (expr) { 130 switch (expr->type) { 131 case EXPR_VALUE: 132 snprintf(buf, 64, "<symbol value: %lld>", expr->value); 133 break; 134 case EXPR_STRING: 135 return show_string(expr->string); 136 default: 137 break; 138 } 139 } 140 break; 141 } 142 case PSEUDO_REG: 143 i = snprintf(buf, 64, "%%r%d", pseudo->nr); 144 if (pseudo->ident) 145 sprintf(buf+i, "(%s)", show_ident(pseudo->ident)); 146 break; 147 case PSEUDO_VAL: { 148 long long value = pseudo->value; 149 if (value > 1000 || value < -1000) 150 snprintf(buf, 64, "$%#llx", value); 151 else 152 snprintf(buf, 64, "$%lld", value); 153 break; 154 } 155 case PSEUDO_ARG: 156 snprintf(buf, 64, "%%arg%d", pseudo->nr); 157 break; 158 case PSEUDO_PHI: 159 i = snprintf(buf, 64, "%%phi%d", pseudo->nr); 160 if (pseudo->ident) 161 sprintf(buf+i, "(%s)", show_ident(pseudo->ident)); 162 break; 163 case PSEUDO_UNDEF: 164 return "UNDEF"; 165 default: 166 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type); 167 } 168 return buf; 169} 170 171static const char *opcodes[] = { 172 [OP_BADOP] = "bad_op", 173 174 /* Fn entrypoint */ 175 [OP_ENTRY] = "<entry-point>", 176 177 /* Terminator */ 178 [OP_RET] = "ret", 179 [OP_BR] = "br", 180 [OP_CBR] = "cbr", 181 [OP_SWITCH] = "switch", 182 [OP_UNREACH] = "unreachable", 183 [OP_COMPUTEDGOTO] = "jmp *", 184 185 /* Binary */ 186 [OP_ADD] = "add", 187 [OP_SUB] = "sub", 188 [OP_MUL] = "mul", 189 [OP_DIVU] = "divu", 190 [OP_DIVS] = "divs", 191 [OP_MODU] = "modu", 192 [OP_MODS] = "mods", 193 [OP_SHL] = "shl", 194 [OP_LSR] = "lsr", 195 [OP_ASR] = "asr", 196 197 /* Floating-point Binary */ 198 [OP_FADD] = "fadd", 199 [OP_FSUB] = "fsub", 200 [OP_FMUL] = "fmul", 201 [OP_FDIV] = "fdiv", 202 203 /* Logical */ 204 [OP_AND] = "and", 205 [OP_OR] = "or", 206 [OP_XOR] = "xor", 207 208 /* Binary comparison */ 209 [OP_SET_EQ] = "seteq", 210 [OP_SET_NE] = "setne", 211 [OP_SET_LE] = "setle", 212 [OP_SET_GE] = "setge", 213 [OP_SET_LT] = "setlt", 214 [OP_SET_GT] = "setgt", 215 [OP_SET_B] = "setb", 216 [OP_SET_A] = "seta", 217 [OP_SET_BE] = "setbe", 218 [OP_SET_AE] = "setae", 219 220 /* floating-point comparison */ 221 [OP_FCMP_ORD] = "fcmpord", 222 [OP_FCMP_OEQ] = "fcmpoeq", 223 [OP_FCMP_ONE] = "fcmpone", 224 [OP_FCMP_OLE] = "fcmpole", 225 [OP_FCMP_OGE] = "fcmpoge", 226 [OP_FCMP_OLT] = "fcmpolt", 227 [OP_FCMP_OGT] = "fcmpogt", 228 [OP_FCMP_UEQ] = "fcmpueq", 229 [OP_FCMP_UNE] = "fcmpune", 230 [OP_FCMP_ULE] = "fcmpule", 231 [OP_FCMP_UGE] = "fcmpuge", 232 [OP_FCMP_ULT] = "fcmpult", 233 [OP_FCMP_UGT] = "fcmpugt", 234 [OP_FCMP_UNO] = "fcmpuno", 235 236 /* Uni */ 237 [OP_NOT] = "not", 238 [OP_NEG] = "neg", 239 [OP_FNEG] = "fneg", 240 241 /* Special three-input */ 242 [OP_SEL] = "select", 243 [OP_FMADD] = "fmadd", 244 245 /* Memory */ 246 [OP_LOAD] = "load", 247 [OP_STORE] = "store", 248 [OP_LABEL] = "label", 249 [OP_SETVAL] = "set", 250 [OP_SETFVAL] = "setfval", 251 [OP_SYMADDR] = "symaddr", 252 253 /* Other */ 254 [OP_PHI] = "phi", 255 [OP_PHISOURCE] = "phisrc", 256 [OP_SEXT] = "sext", 257 [OP_ZEXT] = "zext", 258 [OP_TRUNC] = "trunc", 259 [OP_FCVTU] = "fcvtu", 260 [OP_FCVTS] = "fcvts", 261 [OP_UCVTF] = "ucvtf", 262 [OP_SCVTF] = "scvtf", 263 [OP_FCVTF] = "fcvtf", 264 [OP_UTPTR] = "utptr", 265 [OP_PTRTU] = "ptrtu", 266 [OP_PTRCAST] = "ptrcast", 267 [OP_INLINED_CALL] = "# call", 268 [OP_CALL] = "call", 269 [OP_SLICE] = "slice", 270 [OP_NOP] = "nop", 271 [OP_DEATHNOTE] = "dead", 272 [OP_ASM] = "asm", 273 274 /* Sparse tagging (line numbers, context, whatever) */ 275 [OP_CONTEXT] = "context", 276 [OP_RANGE] = "range-check", 277 278 [OP_COPY] = "copy", 279}; 280 281static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list) 282{ 283 struct asm_constraint *entry; 284 285 FOR_EACH_PTR(list, entry) { 286 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint); 287 if (entry->pseudo) 288 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo)); 289 if (entry->ident) 290 buf += sprintf(buf, " [%s]", show_ident(entry->ident)); 291 sep = ", "; 292 } END_FOR_EACH_PTR(entry); 293 return buf; 294} 295 296static char *show_asm(char *buf, struct instruction *insn) 297{ 298 struct asm_rules *rules = insn->asm_rules; 299 300 buf += sprintf(buf, "\"%s\"", insn->string); 301 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs); 302 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs); 303 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers); 304 return buf; 305} 306 307const char *show_instruction(struct instruction *insn) 308{ 309 int opcode = insn->opcode; 310 static char buffer[4096]; 311 char *buf; 312 313 buf = buffer; 314 if (!insn->bb) 315 buf += sprintf(buf, "# "); 316 317 if (opcode < ARRAY_SIZE(opcodes)) { 318 const char *op = opcodes[opcode]; 319 if (!op) 320 buf += sprintf(buf, "opcode:%d", opcode); 321 else 322 buf += sprintf(buf, "%s", op); 323 if (insn->size) 324 buf += sprintf(buf, ".%d", insn->size); 325 memset(buf, ' ', 20); 326 buf++; 327 } 328 329 if (buf < buffer + 12) 330 buf = buffer + 12; 331 switch (opcode) { 332 case OP_RET: 333 if (insn->src && insn->src != VOID) 334 buf += sprintf(buf, "%s", show_pseudo(insn->src)); 335 break; 336 337 case OP_CBR: 338 buf += sprintf(buf, "%s, %s, %s", show_pseudo(insn->cond), show_label(insn->bb_true), show_label(insn->bb_false)); 339 break; 340 341 case OP_BR: 342 buf += sprintf(buf, "%s", show_label(insn->bb_true)); 343 break; 344 345 case OP_LABEL: 346 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 347 buf += sprintf(buf, "%s", show_label(insn->bb_true)); 348 break; 349 350 case OP_SETVAL: { 351 struct expression *expr = insn->val; 352 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 353 354 if (!expr) { 355 buf += sprintf(buf, "%s", "<none>"); 356 break; 357 } 358 359 switch (expr->type) { 360 case EXPR_VALUE: 361 buf += sprintf(buf, "%lld", expr->value); 362 break; 363 case EXPR_FVALUE: 364 buf += sprintf(buf, "%Le", expr->fvalue); 365 break; 366 case EXPR_STRING: 367 buf += sprintf(buf, "%.40s", show_string(expr->string)); 368 break; 369 case EXPR_SYMBOL: 370 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident)); 371 break; 372 case EXPR_LABEL: 373 buf += sprintf(buf, "%s", show_label(expr->symbol->bb_target)); 374 break; 375 default: 376 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type); 377 } 378 break; 379 } 380 case OP_SETFVAL: 381 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 382 buf += sprintf(buf, "%Le", insn->fvalue); 383 break; 384 385 case OP_SWITCH: { 386 struct multijmp *jmp; 387 buf += sprintf(buf, "%s", show_pseudo(insn->cond)); 388 FOR_EACH_PTR(insn->multijmp_list, jmp) { 389 if (jmp->begin == jmp->end) 390 buf += sprintf(buf, ", %lld -> %s", jmp->begin, show_label(jmp->target)); 391 else if (jmp->begin < jmp->end) 392 buf += sprintf(buf, ", %lld ... %lld -> %s", jmp->begin, jmp->end, show_label(jmp->target)); 393 else 394 buf += sprintf(buf, ", default -> %s", show_label(jmp->target)); 395 } END_FOR_EACH_PTR(jmp); 396 break; 397 } 398 case OP_COMPUTEDGOTO: { 399 struct multijmp *jmp; 400 buf += sprintf(buf, "%s", show_pseudo(insn->src)); 401 FOR_EACH_PTR(insn->multijmp_list, jmp) { 402 buf += sprintf(buf, ", %s", show_label(jmp->target)); 403 } END_FOR_EACH_PTR(jmp); 404 break; 405 } 406 case OP_UNREACH: 407 break; 408 409 case OP_PHISOURCE: { 410 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src)); 411 break; 412 } 413 414 case OP_PHI: { 415 pseudo_t phi; 416 const char *s = " <-"; 417 buf += sprintf(buf, "%s", show_pseudo(insn->target)); 418 FOR_EACH_PTR(insn->phi_list, phi) { 419 if (phi == VOID && !verbose) 420 continue; 421 buf += sprintf(buf, "%s %s", s, show_pseudo(phi)); 422 s = ","; 423 } END_FOR_EACH_PTR(phi); 424 break; 425 } 426 case OP_LOAD: 427 buf += sprintf(buf, "%s <- %lld[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src)); 428 break; 429 case OP_STORE: 430 buf += sprintf(buf, "%s -> %lld[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src)); 431 break; 432 case OP_INLINED_CALL: 433 case OP_CALL: { 434 struct pseudo *arg; 435 if (insn->target && insn->target != VOID) 436 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 437 buf += sprintf(buf, "%s", show_pseudo(insn->func)); 438 FOR_EACH_PTR(insn->arguments, arg) { 439 buf += sprintf(buf, ", %s", show_pseudo(arg)); 440 } END_FOR_EACH_PTR(arg); 441 break; 442 } 443 case OP_SEXT: case OP_ZEXT: 444 case OP_TRUNC: 445 case OP_FCVTU: case OP_FCVTS: 446 case OP_UCVTF: case OP_SCVTF: 447 case OP_FCVTF: 448 case OP_UTPTR: 449 case OP_PTRTU: 450 case OP_PTRCAST: 451 buf += sprintf(buf, "%s <- (%d) %s", 452 show_pseudo(insn->target), 453 type_size(insn->orig_type), 454 show_pseudo(insn->src)); 455 break; 456 case OP_BINARY ... OP_BINARY_END: 457 case OP_FPCMP ... OP_FPCMP_END: 458 case OP_BINCMP ... OP_BINCMP_END: 459 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2)); 460 break; 461 462 case OP_SEL: 463 case OP_FMADD: 464 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target), 465 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3)); 466 break; 467 468 case OP_SLICE: 469 buf += sprintf(buf, "%s <- (%d) %s, %d", show_pseudo(insn->target), type_size(insn->orig_type), show_pseudo(insn->src), insn->from); 470 break; 471 472 case OP_NOT: case OP_NEG: 473 case OP_FNEG: 474 case OP_SYMADDR: 475 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1)); 476 break; 477 478 case OP_CONTEXT: 479 buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment); 480 break; 481 case OP_RANGE: 482 buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3)); 483 break; 484 case OP_NOP: 485 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1)); 486 break; 487 case OP_DEATHNOTE: 488 buf += sprintf(buf, "%s", show_pseudo(insn->target)); 489 break; 490 case OP_ASM: 491 buf = show_asm(buf, insn); 492 break; 493 case OP_COPY: 494 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src)); 495 break; 496 default: 497 break; 498 } 499 500 if (buf >= buffer + sizeof(buffer)) 501 die("instruction buffer overflowed %td\n", buf - buffer); 502 do { --buf; } while (*buf == ' '); 503 *++buf = 0; 504 return buffer; 505} 506 507void show_bb(struct basic_block *bb) 508{ 509 struct instruction *insn; 510 511 printf("%s:\n", show_label(bb)); 512 if (verbose) { 513 pseudo_t needs, defines; 514 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line); 515 516 FOR_EACH_PTR(bb->needs, needs) { 517 struct instruction *def = needs->def; 518 if (def->opcode != OP_PHI) { 519 printf(" **uses %s (from %s)**\n", show_pseudo(needs), show_label(def->bb)); 520 } else { 521 pseudo_t phi; 522 const char *sep = " "; 523 printf(" **uses %s (from", show_pseudo(needs)); 524 FOR_EACH_PTR(def->phi_list, phi) { 525 if (phi == VOID) 526 continue; 527 printf("%s(%s:%s)", sep, show_pseudo(phi), show_label(phi->def->bb)); 528 sep = ", "; 529 } END_FOR_EACH_PTR(phi); 530 printf(")**\n"); 531 } 532 } END_FOR_EACH_PTR(needs); 533 534 FOR_EACH_PTR(bb->defines, defines) { 535 printf(" **defines %s **\n", show_pseudo(defines)); 536 } END_FOR_EACH_PTR(defines); 537 538 if (bb->parents) { 539 struct basic_block *from; 540 FOR_EACH_PTR(bb->parents, from) { 541 printf(" **from %s (%s:%d:%d)**\n", show_label(from), 542 stream_name(from->pos.stream), from->pos.line, from->pos.pos); 543 } END_FOR_EACH_PTR(from); 544 } 545 546 if (bb->children) { 547 struct basic_block *to; 548 FOR_EACH_PTR(bb->children, to) { 549 printf(" **to %s (%s:%d:%d)**\n", show_label(to), 550 stream_name(to->pos.stream), to->pos.line, to->pos.pos); 551 } END_FOR_EACH_PTR(to); 552 } 553 } 554 555 FOR_EACH_PTR(bb->insns, insn) { 556 if (!insn->bb && verbose < 2) 557 continue; 558 printf("\t%s\n", show_instruction(insn)); 559 } END_FOR_EACH_PTR(insn); 560 if (!bb_terminated(bb)) 561 printf("\tEND\n"); 562} 563 564/// 565// show BB of non-removed instruction 566void show_insn_bb(struct instruction *insn) 567{ 568 if (!insn || !insn->bb) 569 return; 570 show_bb(insn->bb); 571} 572 573static void show_symbol_usage(pseudo_t pseudo) 574{ 575 struct pseudo_user *pu; 576 577 if (pseudo) { 578 FOR_EACH_PTR(pseudo->users, pu) { 579 printf("\t%s\n", show_instruction(pu->insn)); 580 } END_FOR_EACH_PTR(pu); 581 } 582} 583 584void show_entry(struct entrypoint *ep) 585{ 586 struct symbol *sym; 587 struct basic_block *bb; 588 589 printf("%s:\n", show_ident(ep->name->ident)); 590 591 if (verbose) { 592 printf("ep %p: %s\n", ep, show_ident(ep->name->ident)); 593 594 FOR_EACH_PTR(ep->syms, sym) { 595 if (!sym->pseudo) 596 continue; 597 if (!sym->pseudo->users) 598 continue; 599 printf(" sym: %p %s\n", sym, show_ident(sym->ident)); 600 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE)) 601 printf("\texternal visibility\n"); 602 show_symbol_usage(sym->pseudo); 603 } END_FOR_EACH_PTR(sym); 604 605 printf("\n"); 606 } 607 608 FOR_EACH_PTR(ep->bbs, bb) { 609 if (!bb) 610 continue; 611 if (!bb->parents && !bb->children && !bb->insns && verbose < 2) 612 continue; 613 show_bb(bb); 614 printf("\n"); 615 } END_FOR_EACH_PTR(bb); 616 617 printf("\n"); 618} 619 620/// 621// show the function containing the instruction but only if not already removed. 622void show_insn_entry(struct instruction *insn) 623{ 624 if (!insn || !insn->bb || !insn->bb->ep) 625 return; 626 show_entry(insn->bb->ep); 627} 628 629static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos) 630{ 631 if (label->bb_target) 632 warning(pos, "label '%s' already bound", show_ident(label->ident)); 633 label->bb_target = bb; 634} 635 636static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label) 637{ 638 struct basic_block *bb = label->bb_target; 639 640 if (!bb) { 641 bb = alloc_basic_block(ep, label->pos); 642 label->bb_target = bb; 643 } 644 return bb; 645} 646 647static void finish_block(struct entrypoint *ep) 648{ 649 struct basic_block *src = ep->active; 650 if (bb_reachable(src)) 651 ep->active = NULL; 652} 653 654static void add_goto(struct entrypoint *ep, struct basic_block *dst) 655{ 656 struct basic_block *src = ep->active; 657 if (bb_reachable(src)) { 658 struct instruction *br = alloc_instruction(OP_BR, 0); 659 br->bb_true = dst; 660 add_bb(&dst->parents, src); 661 add_bb(&src->children, dst); 662 br->bb = src; 663 add_instruction(&src->insns, br); 664 ep->active = NULL; 665 } 666} 667 668static void add_one_insn(struct entrypoint *ep, struct instruction *insn) 669{ 670 struct basic_block *bb = ep->active; 671 672 if (bb_reachable(bb)) { 673 insn->bb = bb; 674 add_instruction(&bb->insns, insn); 675 } 676} 677 678static void add_unreachable(struct entrypoint *ep) 679{ 680 struct instruction *insn = alloc_instruction(OP_UNREACH, 0); 681 add_one_insn(ep, insn); 682 ep->active = NULL; 683} 684 685static void set_activeblock(struct entrypoint *ep, struct basic_block *bb) 686{ 687 if (!bb_terminated(ep->active)) 688 add_goto(ep, bb); 689 690 ep->active = bb; 691 if (bb_reachable(bb)) 692 add_bb(&ep->bbs, bb); 693} 694 695void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t if_true, pseudo_t if_false) 696{ 697 pseudo_t target; 698 struct instruction *select; 699 700 select = alloc_typed_instruction(OP_SEL, phi_node->type); 701 702 assert(br->cond); 703 use_pseudo(select, br->cond, &select->src1); 704 705 target = phi_node->target; 706 assert(target->def == phi_node); 707 select->target = target; 708 target->def = select; 709 710 use_pseudo(select, if_true, &select->src2); 711 use_pseudo(select, if_false, &select->src3); 712 713 insert_last_instruction(bb, select); 714} 715 716static inline int bb_empty(struct basic_block *bb) 717{ 718 return !bb->insns; 719} 720 721/* Add a label to the currently active block, return new active block */ 722static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label) 723{ 724 struct basic_block *bb = label->bb_target; 725 726 if (bb) { 727 set_activeblock(ep, bb); 728 return bb; 729 } 730 bb = ep->active; 731 if (!bb_reachable(bb) || !bb_empty(bb)) { 732 bb = alloc_basic_block(ep, label->pos); 733 set_activeblock(ep, bb); 734 } 735 label->bb_target = bb; 736 return bb; 737} 738 739static void add_branch(struct entrypoint *ep, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false) 740{ 741 struct basic_block *bb = ep->active; 742 struct instruction *br; 743 744 if (bb_reachable(bb)) { 745 br = alloc_instruction(OP_CBR, 0); 746 use_pseudo(br, cond, &br->cond); 747 br->bb_true = bb_true; 748 br->bb_false = bb_false; 749 add_bb(&bb_true->parents, bb); 750 add_bb(&bb_false->parents, bb); 751 add_bb(&bb->children, bb_true); 752 add_bb(&bb->children, bb_false); 753 add_one_insn(ep, br); 754 } 755} 756 757pseudo_t alloc_pseudo(struct instruction *def) 758{ 759 static int nr = 0; 760 struct pseudo * pseudo = __alloc_pseudo(0); 761 pseudo->type = PSEUDO_REG; 762 pseudo->nr = ++nr; 763 pseudo->def = def; 764 return pseudo; 765} 766 767static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym) 768{ 769 pseudo_t pseudo; 770 771 if (!sym) 772 return VOID; 773 774 pseudo = sym->pseudo; 775 if (!pseudo) { 776 pseudo = __alloc_pseudo(0); 777 pseudo->nr = -1; 778 pseudo->type = PSEUDO_SYM; 779 pseudo->sym = sym; 780 pseudo->ident = sym->ident; 781 sym->pseudo = pseudo; 782 add_pseudo(&ep->accesses, pseudo); 783 } 784 /* Symbol pseudos have neither nr nor def */ 785 return pseudo; 786} 787 788pseudo_t value_pseudo(long long val) 789{ 790#define MAX_VAL_HASH 64 791 static struct pseudo_list *prev[MAX_VAL_HASH]; 792 int hash = val & (MAX_VAL_HASH-1); 793 struct pseudo_list **list = prev + hash; 794 pseudo_t pseudo; 795 796 FOR_EACH_PTR(*list, pseudo) { 797 if (pseudo->value == val) 798 return pseudo; 799 } END_FOR_EACH_PTR(pseudo); 800 801 pseudo = __alloc_pseudo(0); 802 pseudo->type = PSEUDO_VAL; 803 pseudo->value = val; 804 add_pseudo(list, pseudo); 805 806 /* Value pseudos have neither nr, usage nor def */ 807 return pseudo; 808} 809 810pseudo_t undef_pseudo(void) 811{ 812 pseudo_t pseudo = __alloc_pseudo(0); 813 pseudo->type = PSEUDO_UNDEF; 814 return pseudo; 815} 816 817static pseudo_t argument_pseudo(struct entrypoint *ep, int nr) 818{ 819 pseudo_t pseudo = __alloc_pseudo(0); 820 struct instruction *entry = ep->entry; 821 822 pseudo->type = PSEUDO_ARG; 823 pseudo->nr = nr; 824 pseudo->def = entry; 825 add_pseudo(&entry->arg_list, pseudo); 826 827 /* Argument pseudos have neither usage nor def */ 828 return pseudo; 829} 830 831struct instruction *alloc_phisrc(pseudo_t pseudo, struct symbol *type) 832{ 833 struct instruction *insn = alloc_typed_instruction(OP_PHISOURCE, type); 834 pseudo_t phi = __alloc_pseudo(0); 835 static int nr = 0; 836 837 phi->type = PSEUDO_PHI; 838 phi->nr = ++nr; 839 phi->def = insn; 840 841 use_pseudo(insn, pseudo, &insn->phi_src); 842 insn->target = phi; 843 return insn; 844} 845 846pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, struct symbol *type) 847{ 848 struct instruction *insn; 849 850 if (!source) 851 return VOID; 852 853 insn = alloc_phisrc(pseudo, type); 854 insn->bb = source; 855 add_instruction(&source->insns, insn); 856 return insn->target; 857} 858 859struct instruction *alloc_phi_node(struct basic_block *bb, struct symbol *type, struct ident *ident) 860{ 861 struct instruction *phi_node = alloc_typed_instruction(OP_PHI, type); 862 pseudo_t phi; 863 864 phi = alloc_pseudo(phi_node); 865 phi->ident = ident; 866 phi->def = phi_node; 867 phi_node->target = phi; 868 phi_node->bb = bb; 869 return phi_node; 870} 871 872void add_phi_node(struct basic_block *bb, struct instruction *phi_node) 873{ 874 struct instruction *insn; 875 876 FOR_EACH_PTR(bb->insns, insn) { 877 enum opcode op = insn->opcode; 878 if (op == OP_PHI) 879 continue; 880 INSERT_CURRENT(phi_node, insn); 881 return; 882 } END_FOR_EACH_PTR(insn); 883 884 // FIXME 885 add_instruction(&bb->insns, phi_node); 886} 887 888struct instruction *insert_phi_node(struct basic_block *bb, struct symbol *var) 889{ 890 struct instruction *phi_node = alloc_phi_node(bb, var, var->ident); 891 add_phi_node(bb, phi_node); 892 return phi_node; 893} 894 895/* 896 * We carry the "access_data" structure around for any accesses, 897 * which simplifies things a lot. It contains all the access 898 * information in one place. 899 */ 900struct access_data { 901 struct symbol *type; // ctype 902 struct symbol *btype; // base type of bitfields 903 pseudo_t address; // pseudo containing address .. 904 long long offset; // byte offset 905}; 906 907static int linearize_simple_address(struct entrypoint *ep, 908 struct expression *addr, 909 struct access_data *ad) 910{ 911 if (addr->type == EXPR_SYMBOL) { 912 linearize_one_symbol(ep, addr->symbol); 913 ad->address = symbol_pseudo(ep, addr->symbol); 914 return 1; 915 } 916 if (addr->type == EXPR_BINOP) { 917 if (addr->right->type == EXPR_VALUE) { 918 if (addr->op == '+') { 919 ad->offset += get_expression_value(addr->right); 920 return linearize_simple_address(ep, addr->left, ad); 921 } 922 } 923 } 924 ad->address = linearize_expression(ep, addr); 925 return 1; 926} 927 928static struct symbol *bitfield_base_type(struct symbol *sym) 929{ 930 struct symbol *base = sym; 931 932 if (sym) { 933 if (sym->type == SYM_NODE) 934 base = base->ctype.base_type; 935 if (base->type == SYM_BITFIELD) { 936 base = base->ctype.base_type; 937 if (sym->packed) { 938 int size = bits_to_bytes(sym->bit_offset + sym->bit_size); 939 sym = __alloc_symbol(0); 940 *sym = *base; 941 sym->bit_size = bytes_to_bits(size); 942 return sym; 943 } 944 return base; 945 } 946 } 947 return sym; 948} 949 950static int linearize_address_gen(struct entrypoint *ep, 951 struct expression *expr, 952 struct access_data *ad) 953{ 954 struct symbol *ctype = expr->ctype; 955 956 if (!ctype) 957 return 0; 958 ad->type = ctype; 959 if (expr->type == EXPR_PREOP && expr->op == '*') 960 return linearize_simple_address(ep, expr->unop, ad); 961 962 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type); 963 return 0; 964} 965 966static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad) 967{ 968 struct instruction *insn; 969 pseudo_t new; 970 971 if (!ep->active) 972 return VOID; 973 974 insn = alloc_typed_instruction(OP_LOAD, ad->btype); 975 new = alloc_pseudo(insn); 976 977 insn->target = new; 978 insn->offset = ad->offset; 979 insn->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE); 980 use_pseudo(insn, ad->address, &insn->src); 981 add_one_insn(ep, insn); 982 return new; 983} 984 985static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value) 986{ 987 struct basic_block *bb = ep->active; 988 struct instruction *store; 989 990 if (!bb) 991 return; 992 993 store = alloc_typed_instruction(OP_STORE, ad->btype); 994 store->offset = ad->offset; 995 store->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE); 996 use_pseudo(store, value, &store->target); 997 use_pseudo(store, ad->address, &store->src); 998 add_one_insn(ep, store); 999} 1000 1001static pseudo_t linearize_bitfield_insert(struct entrypoint *ep, 1002 pseudo_t ori, pseudo_t val, struct symbol *ctype, struct symbol *btype) 1003{ 1004 unsigned int shift = ctype->bit_offset; 1005 unsigned int size = ctype->bit_size; 1006 unsigned long long mask = ((1ULL << size) - 1); 1007 unsigned long long smask= bits_mask(btype->bit_size); 1008 1009 val = add_cast(ep, btype, ctype, OP_ZEXT, val); 1010 if (shift) { 1011 val = add_binary_op(ep, btype, OP_SHL, val, value_pseudo(shift)); 1012 mask <<= shift; 1013 } 1014 ori = add_binary_op(ep, btype, OP_AND, ori, value_pseudo(~mask & smask)); 1015 val = add_binary_op(ep, btype, OP_OR, ori, val); 1016 1017 return val; 1018} 1019 1020static pseudo_t linearize_store_gen(struct entrypoint *ep, 1021 pseudo_t value, 1022 struct access_data *ad) 1023{ 1024 struct symbol *ctype = ad->type; 1025 struct symbol *btype; 1026 pseudo_t store = value; 1027 1028 if (!ep->active) 1029 return VOID; 1030 1031 btype = ad->btype = bitfield_base_type(ctype); 1032 if (type_size(btype) != type_size(ctype)) { 1033 pseudo_t orig = add_load(ep, ad); 1034 store = linearize_bitfield_insert(ep, orig, value, ctype, btype); 1035 } 1036 add_store(ep, ad, store); 1037 return value; 1038} 1039 1040static void taint_undefined_behaviour(struct instruction *insn) 1041{ 1042 pseudo_t src2; 1043 1044 switch (insn->opcode) { 1045 case OP_LSR: 1046 case OP_ASR: 1047 case OP_SHL: 1048 src2 = insn->src2; 1049 if (src2->type != PSEUDO_VAL) 1050 break; 1051 if ((unsigned long long)src2->value >= insn->size) 1052 insn->tainted = 1; 1053 break; 1054 } 1055} 1056 1057static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right) 1058{ 1059 struct instruction *insn = alloc_typed_instruction(op, ctype); 1060 pseudo_t target = alloc_pseudo(insn); 1061 insn->target = target; 1062 use_pseudo(insn, left, &insn->src1); 1063 use_pseudo(insn, right, &insn->src2); 1064 add_one_insn(ep, insn); 1065 return target; 1066} 1067 1068static pseudo_t add_cmp_op(struct entrypoint *ep, struct symbol *ctype, int op, struct symbol *itype, pseudo_t left, pseudo_t right) 1069{ 1070 pseudo_t target = add_binary_op(ep, ctype, op, left, right); 1071 target->def->itype = itype; 1072 return target; 1073} 1074 1075static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val) 1076{ 1077 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype); 1078 pseudo_t target = alloc_pseudo(insn); 1079 insn->target = target; 1080 insn->val = val; 1081 add_one_insn(ep, insn); 1082 return target; 1083} 1084 1085static pseudo_t add_setfval(struct entrypoint *ep, struct symbol *ctype, long double fval) 1086{ 1087 struct instruction *insn = alloc_typed_instruction(OP_SETFVAL, ctype); 1088 pseudo_t target = alloc_pseudo(insn); 1089 insn->target = target; 1090 insn->fvalue = fval; 1091 add_one_insn(ep, insn); 1092 return target; 1093} 1094 1095static pseudo_t add_symbol_address(struct entrypoint *ep, struct expression *expr) 1096{ 1097 struct instruction *insn = alloc_typed_instruction(OP_SYMADDR, expr->ctype); 1098 pseudo_t target = alloc_pseudo(insn); 1099 1100 insn->target = target; 1101 use_pseudo(insn, symbol_pseudo(ep, expr->symbol), &insn->src); 1102 add_one_insn(ep, insn); 1103 return target; 1104} 1105 1106static pseudo_t linearize_bitfield_extract(struct entrypoint *ep, 1107 pseudo_t val, struct symbol *ctype, struct symbol *btype) 1108{ 1109 unsigned int off = ctype->bit_offset; 1110 1111 if (off) { 1112 pseudo_t shift = value_pseudo(off); 1113 val = add_binary_op(ep, btype, OP_LSR, val, shift); 1114 } 1115 val = cast_pseudo(ep, val, btype, ctype); 1116 return val; 1117} 1118 1119static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad) 1120{ 1121 struct symbol *ctype = ad->type; 1122 struct symbol *btype; 1123 pseudo_t new; 1124 1125 if (!ep->active) 1126 return VOID; 1127 1128 btype = ad->btype = bitfield_base_type(ctype); 1129 new = add_load(ep, ad); 1130 if (ctype->bit_size != type_size(btype)) 1131 new = linearize_bitfield_extract(ep, new, ctype, btype); 1132 return new; 1133} 1134 1135static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr) 1136{ 1137 struct access_data ad = { NULL, }; 1138 pseudo_t value; 1139 1140 if (!linearize_address_gen(ep, expr, &ad)) 1141 return VOID; 1142 value = linearize_load_gen(ep, &ad); 1143 return value; 1144} 1145 1146static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop) 1147{ 1148 struct access_data ad = { NULL, }; 1149 pseudo_t old, new, one; 1150 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB; 1151 1152 if (!linearize_address_gen(ep, expr->unop, &ad)) 1153 return VOID; 1154 1155 old = linearize_load_gen(ep, &ad); 1156 op = opcode_float(op, expr->ctype); 1157 if (is_float_type(expr->ctype)) 1158 one = add_setfval(ep, expr->ctype, expr->op_value); 1159 else 1160 one = value_pseudo(expr->op_value); 1161 if (ad.btype != ad.type) 1162 old = cast_pseudo(ep, old, ad.type, ad.btype); 1163 new = add_binary_op(ep, ad.btype, op, old, one); 1164 if (ad.btype != ad.type) 1165 new = cast_pseudo(ep, new, ad.btype, ad.type); 1166 linearize_store_gen(ep, new, &ad); 1167 return postop ? old : new; 1168} 1169 1170static pseudo_t add_unop(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t src) 1171{ 1172 struct instruction *insn = alloc_typed_instruction(op, ctype); 1173 pseudo_t new = alloc_pseudo(insn); 1174 1175 insn->target = new; 1176 use_pseudo(insn, src, &insn->src1); 1177 add_one_insn(ep, insn); 1178 return new; 1179} 1180 1181static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, 1182 struct symbol *from, int op, pseudo_t src) 1183{ 1184 pseudo_t new = add_unop(ep, to, op, src); 1185 new->def->orig_type = from; 1186 return new; 1187} 1188 1189static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr) 1190{ 1191 pseudo_t pre = linearize_expression(ep, expr->base); 1192 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype); 1193 pseudo_t new = alloc_pseudo(insn); 1194 1195 insn->target = new; 1196 insn->from = expr->r_bitpos; 1197 insn->orig_type = expr->base->ctype; 1198 use_pseudo(insn, pre, &insn->src); 1199 add_one_insn(ep, insn); 1200 return new; 1201} 1202 1203static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr) 1204{ 1205 pseudo_t pre = linearize_expression(ep, expr->unop); 1206 struct symbol *ctype = expr->ctype; 1207 switch (expr->op) { 1208 case '+': 1209 return pre; 1210 case '!': { 1211 pseudo_t zero = value_pseudo(0); 1212 return add_cmp_op(ep, ctype, OP_SET_EQ, expr->unop->ctype, pre, zero); 1213 } 1214 case '~': 1215 return add_unop(ep, ctype, OP_NOT, pre); 1216 case '-': 1217 return add_unop(ep, ctype, opcode_float(OP_NEG, ctype), pre); 1218 } 1219 return VOID; 1220} 1221 1222static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr) 1223{ 1224 /* 1225 * '*' is an lvalue access, and is fundamentally different 1226 * from an arithmetic operation. Maybe it should have an 1227 * expression type of its own.. 1228 */ 1229 if (expr->op == '*') 1230 return linearize_access(ep, expr); 1231 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT) 1232 return linearize_inc_dec(ep, expr, 0); 1233 return linearize_regular_preop(ep, expr); 1234} 1235 1236static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr) 1237{ 1238 return linearize_inc_dec(ep, expr, 1); 1239} 1240 1241/* 1242 * Casts to pointers are "less safe" than other casts, since 1243 * they imply type-unsafe accesses. "void *" is a special 1244 * case, since you can't access through it anyway without another 1245 * cast. 1246 */ 1247enum mtype { 1248 MTYPE_UINT, 1249 MTYPE_SINT, 1250 MTYPE_PTR, 1251 MTYPE_VPTR, // TODO: must be removed ? 1252 MTYPE_FLOAT, 1253 MTYPE_BAD, 1254}; 1255 1256static enum mtype get_mtype(struct symbol *s) 1257{ 1258 int sign = (s->ctype.modifiers & MOD_SIGNED) ? 1 : 0; 1259 1260retry: switch (s->type) { 1261 case SYM_NODE: 1262 s = s->ctype.base_type; 1263 goto retry; 1264 case SYM_PTR: 1265 if (s->ctype.base_type == &void_ctype) 1266 return MTYPE_VPTR; 1267 return MTYPE_PTR; 1268 case SYM_BITFIELD: 1269 case SYM_RESTRICT: 1270 case SYM_FOULED: 1271 case SYM_ENUM: 1272 s = s->ctype.base_type; 1273 /* fall-through */ 1274 case_int: 1275 return sign ? MTYPE_SINT : MTYPE_UINT; 1276 case SYM_BASETYPE: 1277 if (s->ctype.base_type == &fp_type) 1278 return MTYPE_FLOAT; 1279 if (s->ctype.base_type == &int_type) 1280 goto case_int; 1281 /* fall-through */ 1282 default: 1283 return MTYPE_BAD; 1284 } 1285} 1286 1287static int get_cast_opcode(struct symbol *dst, struct symbol *src) 1288{ 1289 enum mtype stype = get_mtype(src); 1290 enum mtype dtype = get_mtype(dst); 1291 1292 switch (dtype) { 1293 case MTYPE_FLOAT: 1294 switch (stype) { 1295 case MTYPE_FLOAT: 1296 if (dst->bit_size == src->bit_size) 1297 return OP_NOP; 1298 return OP_FCVTF; 1299 case MTYPE_UINT: 1300 return OP_UCVTF; 1301 case MTYPE_SINT: 1302 return OP_SCVTF; 1303 default: 1304 return OP_BADOP; 1305 } 1306 case MTYPE_PTR: 1307 switch (stype) { 1308 case MTYPE_UINT: 1309 case MTYPE_SINT: 1310 return OP_UTPTR; 1311 case MTYPE_PTR: 1312 case MTYPE_VPTR: 1313 return OP_PTRCAST; 1314 default: 1315 return OP_BADOP; 1316 } 1317 case MTYPE_VPTR: 1318 switch (stype) { 1319 case MTYPE_PTR: 1320 case MTYPE_VPTR: 1321 case MTYPE_UINT: 1322 stype = MTYPE_UINT; 1323 /* fall through */ 1324 case MTYPE_SINT: 1325 break; 1326 default: 1327 return OP_BADOP; 1328 } 1329 /* fall through */ 1330 case MTYPE_UINT: 1331 case MTYPE_SINT: 1332 switch (stype) { 1333 case MTYPE_FLOAT: 1334 return dtype == MTYPE_UINT ? OP_FCVTU : OP_FCVTS; 1335 case MTYPE_PTR: 1336 return OP_PTRTU; 1337 case MTYPE_VPTR: 1338 case MTYPE_UINT: 1339 case MTYPE_SINT: 1340 if (dst->bit_size ==src->bit_size) 1341 return OP_NOP; 1342 if (dst->bit_size < src->bit_size) 1343 return OP_TRUNC; 1344 return stype == MTYPE_SINT ? OP_SEXT : OP_ZEXT; 1345 default: 1346 return OP_BADOP; 1347 } 1348 /* fall through */ 1349 default: 1350 if (src->type == SYM_NODE) 1351 src = src->ctype.base_type; 1352 if (dst->type == SYM_NODE) 1353 dst = dst->ctype.base_type; 1354 if (src == dst) 1355 return OP_NOP; 1356 return OP_BADOP; 1357 } 1358} 1359 1360static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to) 1361{ 1362 const struct position pos = current_pos; 1363 pseudo_t result; 1364 struct instruction *insn; 1365 int opcode; 1366 1367 if (src == VOID) 1368 return VOID; 1369 if (!from || !to) 1370 return VOID; 1371 if (from->bit_size < 0 || to->bit_size < 0) 1372 return VOID; 1373 opcode = get_cast_opcode(to, from); 1374 switch (opcode) { 1375 case OP_NOP: 1376 return src; 1377 case OP_UTPTR: 1378 if (from->bit_size == to->bit_size) 1379 break; 1380 if (src == value_pseudo(0)) 1381 break; 1382 if (Wint_to_pointer_cast) 1383 warning(pos, "non size-preserving integer to pointer cast"); 1384 src = cast_pseudo(ep, src, from, size_t_ctype); 1385 from = size_t_ctype; 1386 break; 1387 case OP_PTRTU: 1388 if (from->bit_size == to->bit_size) 1389 break; 1390 if (Wpointer_to_int_cast) 1391 warning(pos, "non size-preserving pointer to integer cast"); 1392 src = cast_pseudo(ep, src, from, size_t_ctype); 1393 return cast_pseudo(ep, src, size_t_ctype, to); 1394 case OP_BADOP: 1395 return VOID; 1396 default: 1397 break; 1398 } 1399 insn = alloc_typed_instruction(opcode, to); 1400 result = alloc_pseudo(insn); 1401 insn->target = result; 1402 insn->orig_type = from; 1403 use_pseudo(insn, src, &insn->src); 1404 add_one_insn(ep, insn); 1405 return result; 1406} 1407 1408static int map_opcode(int opcode, struct symbol *ctype) 1409{ 1410 if (ctype && is_float_type(ctype)) 1411 return opcode_table[opcode].to_float; 1412 if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) { 1413 switch(opcode) { 1414 case OP_DIVU: case OP_MODU: case OP_LSR: 1415 opcode++; 1416 } 1417 } 1418 return opcode; 1419} 1420 1421static inline pseudo_t add_convert_to_bool(struct entrypoint *ep, pseudo_t src, struct symbol *type) 1422{ 1423 pseudo_t zero; 1424 int op; 1425 1426 if (!type || src == VOID) 1427 return VOID; 1428 if (is_bool_type(type)) 1429 return src; 1430 if (src->type == PSEUDO_VAL && (src->value == 0 || src->value == 1)) 1431 return src; 1432 if (is_float_type(type)) { 1433 zero = add_setfval(ep, type, 0.0); 1434 op = map_opcode(OP_SET_NE, type); 1435 } else { 1436 zero = value_pseudo(0); 1437 op = OP_SET_NE; 1438 } 1439 return add_cmp_op(ep, &bool_ctype, op, type, src, zero); 1440} 1441 1442static pseudo_t linearize_expression_to_bool(struct entrypoint *ep, struct expression *expr) 1443{ 1444 pseudo_t dst; 1445 dst = linearize_expression(ep, expr); 1446 dst = add_convert_to_bool(ep, dst, expr->ctype); 1447 return dst; 1448} 1449 1450static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr) 1451{ 1452 struct access_data ad = { NULL, }; 1453 struct expression *target = expr->left; 1454 struct expression *src = expr->right; 1455 struct symbol *ctype; 1456 pseudo_t value; 1457 1458 value = linearize_expression(ep, src); 1459 if (!target || !linearize_address_gen(ep, target, &ad)) 1460 return value; 1461 if (expr->op != '=') { 1462 pseudo_t oldvalue = linearize_load_gen(ep, &ad); 1463 pseudo_t dst; 1464 static const int op_trans[] = { 1465 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD, 1466 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB, 1467 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL, 1468 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU, 1469 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU, 1470 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL, 1471 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR, 1472 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND, 1473 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR, 1474 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR 1475 }; 1476 int opcode; 1477 1478 if (!src) 1479 return VOID; 1480 1481 ctype = src->ctype; 1482 oldvalue = cast_pseudo(ep, oldvalue, target->ctype, ctype); 1483 opcode = map_opcode(op_trans[expr->op - SPECIAL_BASE], ctype); 1484 dst = add_binary_op(ep, ctype, opcode, oldvalue, value); 1485 taint_undefined_behaviour(dst->def); 1486 value = cast_pseudo(ep, dst, ctype, expr->ctype); 1487 } 1488 value = linearize_store_gen(ep, value, &ad); 1489 return value; 1490} 1491 1492static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr) 1493{ 1494 struct expression *arg, *fn; 1495 struct instruction *insn; 1496 pseudo_t retval, call; 1497 struct ctype *ctype = NULL; 1498 struct symbol *fntype; 1499 struct context *context; 1500 1501 if (!expr->ctype) 1502 return VOID; 1503 1504 fn = expr->fn; 1505 fntype = fn->ctype; 1506 1507 // handle builtins 1508 if (fntype->op && fntype->op->linearize) { 1509 retval = fntype->op->linearize(ep, expr); 1510 if (retval) 1511 return retval; 1512 } 1513 1514 ctype = &fntype->ctype; 1515 1516 insn = alloc_typed_instruction(OP_CALL, expr->ctype); 1517 add_symbol(&insn->fntypes, fntype); 1518 FOR_EACH_PTR(expr->args, arg) { 1519 pseudo_t new = linearize_expression(ep, arg); 1520 use_pseudo(insn, new, add_pseudo(&insn->arguments, new)); 1521 add_symbol(&insn->fntypes, arg->ctype); 1522 } END_FOR_EACH_PTR(arg); 1523 1524 if (fn->type == EXPR_PREOP && fn->op == '*' && is_func_type(fn->ctype)) 1525 fn = fn->unop; 1526 1527 if (fn->type == EXPR_SYMBOL) { 1528 call = symbol_pseudo(ep, fn->symbol); 1529 } else { 1530 call = linearize_expression(ep, fn); 1531 } 1532 use_pseudo(insn, call, &insn->func); 1533 retval = VOID; 1534 if (expr->ctype != &void_ctype) 1535 retval = alloc_pseudo(insn); 1536 insn->target = retval; 1537 add_one_insn(ep, insn); 1538 1539 if (ctype) { 1540 FOR_EACH_PTR(ctype->contexts, context) { 1541 int in = context->in; 1542 int out = context->out; 1543 int check = 0; 1544 int context_diff; 1545 if (in < 0) { 1546 check = 1; 1547 in = 0; 1548 } 1549 if (out < 0) { 1550 check = 0; 1551 out = 0; 1552 } 1553 context_diff = out - in; 1554 if (check || context_diff) { 1555 insn = alloc_instruction(OP_CONTEXT, 0); 1556 insn->increment = context_diff; 1557 insn->check = check; 1558 insn->context_expr = context->context; 1559 add_one_insn(ep, insn); 1560 } 1561 } END_FOR_EACH_PTR(context); 1562 1563 if (ctype->modifiers & MOD_NORETURN) 1564 add_unreachable(ep); 1565 } 1566 1567 return retval; 1568} 1569 1570static pseudo_t linearize_binop_bool(struct entrypoint *ep, struct expression *expr) 1571{ 1572 pseudo_t src1, src2, dst; 1573 int op = (expr->op == SPECIAL_LOGICAL_OR) ? OP_OR : OP_AND; 1574 1575 src1 = linearize_expression_to_bool(ep, expr->left); 1576 src2 = linearize_expression_to_bool(ep, expr->right); 1577 dst = add_binary_op(ep, &bool_ctype, op, src1, src2); 1578 if (expr->ctype != &bool_ctype) 1579 dst = cast_pseudo(ep, dst, &bool_ctype, expr->ctype); 1580 return dst; 1581} 1582 1583static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr) 1584{ 1585 pseudo_t src1, src2, dst; 1586 static const int opcode[] = { 1587 ['+'] = OP_ADD, ['-'] = OP_SUB, 1588 ['*'] = OP_MUL, ['/'] = OP_DIVU, 1589 ['%'] = OP_MODU, ['&'] = OP_AND, 1590 ['|'] = OP_OR, ['^'] = OP_XOR, 1591 [SPECIAL_LEFTSHIFT] = OP_SHL, 1592 [SPECIAL_RIGHTSHIFT] = OP_LSR, 1593 }; 1594 int op; 1595 1596 src1 = linearize_expression(ep, expr->left); 1597 src2 = linearize_expression(ep, expr->right); 1598 op = map_opcode(opcode[expr->op], expr->ctype); 1599 dst = add_binary_op(ep, expr->ctype, op, src1, src2); 1600 taint_undefined_behaviour(dst->def); 1601 return dst; 1602} 1603 1604static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false); 1605 1606static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false); 1607 1608static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr) 1609{ 1610 pseudo_t cond, valt, valf, res; 1611 struct instruction *insn; 1612 1613 valt = linearize_expression(ep, expr->cond_true); 1614 valf = linearize_expression(ep, expr->cond_false); 1615 cond = linearize_expression(ep, expr->conditional); 1616 1617 insn = alloc_typed_instruction(OP_SEL, expr->ctype); 1618 if (!expr->cond_true) 1619 valt = cond; 1620 use_pseudo(insn, cond, &insn->src1); 1621 use_pseudo(insn, valt, &insn->src2); 1622 use_pseudo(insn, valf, &insn->src3); 1623 1624 res = alloc_pseudo(insn); 1625 insn->target = res; 1626 add_one_insn(ep, insn); 1627 return res; 1628} 1629 1630static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr, 1631 pseudo_t phi1, pseudo_t phi2) 1632{ 1633 pseudo_t target; 1634 struct instruction *phi_node; 1635 1636 if (phi1 == VOID) 1637 return (phi2 == VOID) ? phi2 : phi2->def->src; 1638 if (phi2 == VOID) 1639 return (phi1 == VOID) ? phi1 : phi1->def->src; 1640 1641 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype); 1642 link_phi(phi_node, phi1); 1643 link_phi(phi_node, phi2); 1644 phi_node->target = target = alloc_pseudo(phi_node); 1645 add_one_insn(ep, phi_node); 1646 return target; 1647} 1648 1649static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr, 1650 struct expression *cond, 1651 struct expression *expr_false) 1652{ 1653 pseudo_t src1, src2; 1654 struct basic_block *bb_false; 1655 struct basic_block *merge; 1656 pseudo_t phi1, phi2; 1657 1658 if (!expr_false || !ep->active) 1659 return VOID; 1660 1661 bb_false = alloc_basic_block(ep, expr_false->pos); 1662 merge = alloc_basic_block(ep, expr->pos); 1663 1664 src1 = linearize_expression(ep, cond); 1665 phi1 = alloc_phi(ep->active, src1, expr->ctype); 1666 add_branch(ep, src1, merge, bb_false); 1667 1668 set_activeblock(ep, bb_false); 1669 src2 = linearize_expression(ep, expr_false); 1670 phi2 = alloc_phi(ep->active, src2, expr->ctype); 1671 set_activeblock(ep, merge); 1672 1673 return add_join_conditional(ep, expr, phi1, phi2); 1674} 1675 1676static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr, 1677 struct expression *cond, 1678 struct expression *expr_true, 1679 struct expression *expr_false) 1680{ 1681 pseudo_t src1, src2; 1682 pseudo_t phi1, phi2; 1683 struct basic_block *bb_true, *bb_false, *merge; 1684 1685 if (!cond || !expr_true || !expr_false || !ep->active) 1686 return VOID; 1687 bb_true = alloc_basic_block(ep, expr_true->pos); 1688 bb_false = alloc_basic_block(ep, expr_false->pos); 1689 merge = alloc_basic_block(ep, expr->pos); 1690 1691 linearize_cond_branch(ep, cond, bb_true, bb_false); 1692 1693 set_activeblock(ep, bb_true); 1694 src1 = linearize_expression(ep, expr_true); 1695 phi1 = alloc_phi(ep->active, src1, expr->ctype); 1696 add_goto(ep, merge); 1697 1698 set_activeblock(ep, bb_false); 1699 src2 = linearize_expression(ep, expr_false); 1700 phi2 = alloc_phi(ep->active, src2, expr->ctype); 1701 set_activeblock(ep, merge); 1702 1703 return add_join_conditional(ep, expr, phi1, phi2); 1704} 1705 1706static void insert_phis(struct basic_block *bb, pseudo_t src, struct symbol *ctype, 1707 struct instruction *node) 1708{ 1709 struct basic_block *parent; 1710 1711 FOR_EACH_PTR(bb->parents, parent) { 1712 struct instruction *phisrc = alloc_phisrc(src, ctype); 1713 insert_last_instruction(parent, phisrc); 1714 link_phi(node, phisrc->target); 1715 } END_FOR_EACH_PTR(parent); 1716} 1717 1718static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr) 1719{ 1720 struct symbol *ctype = expr->ctype; 1721 struct basic_block *other, *merge; 1722 struct instruction *node; 1723 pseudo_t src1, src2, phi2; 1724 1725 if (!ep->active || !expr->left || !expr->right) 1726 return VOID; 1727 1728 other = alloc_basic_block(ep, expr->right->pos); 1729 merge = alloc_basic_block(ep, expr->pos); 1730 node = alloc_phi_node(merge, ctype, NULL); 1731 1732 // LHS and its shortcut 1733 if (expr->op == SPECIAL_LOGICAL_OR) { 1734 linearize_cond_branch(ep, expr->left, merge, other); 1735 src1 = value_pseudo(1); 1736 } else { 1737 linearize_cond_branch(ep, expr->left, other, merge); 1738 src1 = value_pseudo(0); 1739 } 1740 insert_phis(merge, src1, ctype, node); 1741 1742 // RHS 1743 set_activeblock(ep, other); 1744 src2 = linearize_expression_to_bool(ep, expr->right); 1745 src2 = cast_pseudo(ep, src2, &bool_ctype, ctype); 1746 phi2 = alloc_phi(ep->active, src2, ctype); 1747 link_phi(node, phi2); 1748 1749 // join 1750 set_activeblock(ep, merge); 1751 add_instruction(&merge->insns, node); 1752 return node->target; 1753} 1754 1755static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr) 1756{ 1757 static const int cmpop[] = { 1758 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT, 1759 [SPECIAL_EQUAL] = OP_SET_EQ, 1760 [SPECIAL_NOTEQUAL] = OP_SET_NE, 1761 [SPECIAL_GTE] = OP_SET_GE, 1762 [SPECIAL_LTE] = OP_SET_LE, 1763 [SPECIAL_UNSIGNED_LT] = OP_SET_B, 1764 [SPECIAL_UNSIGNED_GT] = OP_SET_A, 1765 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE, 1766 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE, 1767 }; 1768 struct symbol *itype = expr->right->ctype; 1769 int op = opcode_float(cmpop[expr->op], itype); 1770 pseudo_t src1 = linearize_expression(ep, expr->left); 1771 pseudo_t src2 = linearize_expression(ep, expr->right); 1772 pseudo_t dst = add_cmp_op(ep, expr->ctype, op, itype, src1, src2); 1773 return dst; 1774} 1775 1776 1777static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false) 1778{ 1779 pseudo_t cond; 1780 1781 if (!expr || !valid_type(expr->ctype) || !bb_reachable(ep->active)) 1782 return VOID; 1783 1784 switch (expr->type) { 1785 1786 case EXPR_STRING: 1787 case EXPR_VALUE: 1788 add_goto(ep, expr->value ? bb_true : bb_false); 1789 break; 1790 case EXPR_FVALUE: 1791 add_goto(ep, expr->fvalue ? bb_true : bb_false); 1792 break; 1793 case EXPR_LOGICAL: 1794 linearize_logical_branch(ep, expr, bb_true, bb_false); 1795 break; 1796 case EXPR_COMPARE: 1797 cond = linearize_compare(ep, expr); 1798 add_branch(ep, cond, bb_true, bb_false); 1799 break; 1800 case EXPR_PREOP: 1801 if (expr->op == '!') 1802 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true); 1803 /* fall through */ 1804 default: 1805 cond = linearize_expression_to_bool(ep, expr); 1806 add_branch(ep, cond, bb_true, bb_false); 1807 break; 1808 } 1809 return VOID; 1810} 1811 1812 1813static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false) 1814{ 1815 struct basic_block *next = alloc_basic_block(ep, expr->pos); 1816 1817 if (expr->op == SPECIAL_LOGICAL_OR) 1818 linearize_cond_branch(ep, expr->left, bb_true, next); 1819 else 1820 linearize_cond_branch(ep, expr->left, next, bb_false); 1821 set_activeblock(ep, next); 1822 linearize_cond_branch(ep, expr->right, bb_true, bb_false); 1823 return VOID; 1824} 1825 1826static pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr) 1827{ 1828 pseudo_t src; 1829 struct expression *orig = expr->cast_expression; 1830 1831 if (!orig) 1832 return VOID; 1833 1834 src = linearize_expression(ep, orig); 1835 return cast_pseudo(ep, src, orig->ctype, expr->ctype); 1836} 1837 1838static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad) 1839{ 1840 switch (initializer->type) { 1841 case EXPR_INITIALIZER: { 1842 struct expression *expr; 1843 FOR_EACH_PTR(initializer->expr_list, expr) { 1844 linearize_initializer(ep, expr, ad); 1845 } END_FOR_EACH_PTR(expr); 1846 break; 1847 } 1848 case EXPR_POS: 1849 ad->offset = initializer->init_offset; 1850 linearize_initializer(ep, initializer->init_expr, ad); 1851 break; 1852 default: { 1853 pseudo_t value = linearize_expression(ep, initializer); 1854 ad->type = initializer->ctype; 1855 linearize_store_gen(ep, value, ad); 1856 return value; 1857 } 1858 } 1859 1860 return VOID; 1861} 1862 1863static void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr) 1864{ 1865 struct access_data ad = { NULL, }; 1866 1867 ad.type = arg; 1868 ad.address = symbol_pseudo(ep, arg); 1869 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad); 1870} 1871 1872static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr) 1873{ 1874 if (!expr || !valid_type(expr->ctype)) 1875 return VOID; 1876 1877 current_pos = expr->pos; 1878 switch (expr->type) { 1879 case EXPR_SYMBOL: 1880 linearize_one_symbol(ep, expr->symbol); 1881 return add_symbol_address(ep, expr); 1882 1883 case EXPR_VALUE: 1884 return value_pseudo(expr->value); 1885 1886 case EXPR_STRING: 1887 case EXPR_LABEL: 1888 return add_setval(ep, expr->ctype, expr); 1889 1890 case EXPR_FVALUE: 1891 return add_setfval(ep, expr->ctype, expr->fvalue); 1892 1893 case EXPR_STATEMENT: 1894 return linearize_statement(ep, expr->statement); 1895 1896 case EXPR_CALL: 1897 return linearize_call_expression(ep, expr); 1898 1899 case EXPR_BINOP: 1900 if (expr->op == SPECIAL_LOGICAL_AND || expr->op == SPECIAL_LOGICAL_OR) 1901 return linearize_binop_bool(ep, expr); 1902 return linearize_binop(ep, expr); 1903 1904 case EXPR_LOGICAL: 1905 return linearize_logical(ep, expr); 1906 1907 case EXPR_COMPARE: 1908 return linearize_compare(ep, expr); 1909 1910 case EXPR_SELECT: 1911 return linearize_select(ep, expr); 1912 1913 case EXPR_CONDITIONAL: 1914 if (!expr->cond_true) 1915 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false); 1916 1917 return linearize_conditional(ep, expr, expr->conditional, 1918 expr->cond_true, expr->cond_false); 1919 1920 case EXPR_COMMA: 1921 linearize_expression(ep, expr->left); 1922 return linearize_expression(ep, expr->right); 1923 1924 case EXPR_ASSIGNMENT: 1925 return linearize_assignment(ep, expr); 1926 1927 case EXPR_PREOP: 1928 return linearize_preop(ep, expr); 1929 1930 case EXPR_POSTOP: 1931 return linearize_postop(ep, expr); 1932 1933 case EXPR_CAST: 1934 case EXPR_FORCE_CAST: 1935 case EXPR_IMPLIED_CAST: 1936 return linearize_cast(ep, expr); 1937 1938 case EXPR_SLICE: 1939 return linearize_slice(ep, expr); 1940 1941 case EXPR_INITIALIZER: 1942 case EXPR_POS: 1943 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op); 1944 return VOID; 1945 default: 1946 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op); 1947 return VOID; 1948 } 1949 return VOID; 1950} 1951 1952static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym) 1953{ 1954 struct access_data ad = { NULL, }; 1955 pseudo_t value; 1956 1957 if (!sym || !sym->initializer || sym->initialized) 1958 return VOID; 1959 1960 /* We need to output these puppies some day too.. */ 1961 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL)) 1962 return VOID; 1963 1964 sym->initialized = 1; 1965 ad.address = symbol_pseudo(ep, sym); 1966 1967 if (sym->initializer && !is_scalar_type(sym)) { 1968 // default zero initialization [6.7.9.21] 1969 // FIXME: this init the whole aggregate while 1970 // only the existing fields need to be initialized. 1971 // FIXME: this init the whole aggregate even if 1972 // all fields arelater explicitely initialized. 1973 ad.type = sym; 1974 ad.address = symbol_pseudo(ep, sym); 1975 linearize_store_gen(ep, value_pseudo(0), &ad); 1976 } 1977 1978 value = linearize_initializer(ep, sym->initializer, &ad); 1979 return value; 1980} 1981 1982static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt) 1983{ 1984 pseudo_t pseudo; 1985 struct statement *s; 1986 1987 pseudo = VOID; 1988 FOR_EACH_PTR(stmt->stmts, s) { 1989 pseudo = linearize_statement(ep, s); 1990 } END_FOR_EACH_PTR(s); 1991 1992 return pseudo; 1993} 1994 1995static void add_return(struct entrypoint *ep, struct basic_block *bb, struct symbol *ctype, pseudo_t src) 1996{ 1997 struct instruction *phi_node = first_instruction(bb->insns); 1998 pseudo_t phi; 1999 if (!phi_node) { 2000 phi_node = alloc_typed_instruction(OP_PHI, ctype); 2001 phi_node->target = alloc_pseudo(phi_node); 2002 phi_node->bb = bb; 2003 add_instruction(&bb->insns, phi_node); 2004 } 2005 phi = alloc_phi(ep->active, src, ctype); 2006 phi->ident = &return_ident; 2007 link_phi(phi_node, phi); 2008} 2009 2010static pseudo_t linearize_fn_statement(struct entrypoint *ep, struct statement *stmt) 2011{ 2012 struct instruction *phi_node; 2013 struct basic_block *bb; 2014 pseudo_t pseudo; 2015 2016 pseudo = linearize_compound_statement(ep, stmt); 2017 if (!is_void_type(stmt->ret)) { // non-void function 2018 struct basic_block *active = ep->active; 2019 if (active && !bb_terminated(active)) { // missing return 2020 struct basic_block *bb_ret; 2021 bb_ret = get_bound_block(ep, stmt->ret); 2022 add_return(ep, bb_ret, stmt->ret, undef_pseudo()); 2023 } 2024 } 2025 bb = add_label(ep, stmt->ret); 2026 phi_node = first_instruction(bb->insns); 2027 if (phi_node) 2028 pseudo = phi_node->target; 2029 return pseudo; 2030} 2031 2032static pseudo_t linearize_inlined_call(struct entrypoint *ep, struct statement *stmt) 2033{ 2034 struct instruction *insn = alloc_instruction(OP_INLINED_CALL, 0); 2035 struct statement *args = stmt->args; 2036 struct basic_block *bb; 2037 pseudo_t pseudo; 2038 2039 if (args) { 2040 struct symbol *sym; 2041 2042 concat_symbol_list(args->declaration, &ep->syms); 2043 FOR_EACH_PTR(args->declaration, sym) { 2044 pseudo_t value = linearize_one_symbol(ep, sym); 2045 add_pseudo(&insn->arguments, value); 2046 } END_FOR_EACH_PTR(sym); 2047 } 2048 2049 pseudo = linearize_fn_statement(ep, stmt); 2050 insn->target = pseudo; 2051 2052 insn->func = symbol_pseudo(ep, stmt->inline_fn); 2053 bb = ep->active; 2054 if (!bb->insns) 2055 bb->pos = stmt->pos; 2056 add_one_insn(ep, insn); 2057 return pseudo; 2058} 2059 2060static pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt) 2061{ 2062 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0); 2063 struct expression *expr = stmt->expression; 2064 2065 insn->increment = get_expression_value(expr); 2066 insn->context_expr = stmt->context; 2067 add_one_insn(ep, insn); 2068 return VOID; 2069} 2070 2071static pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt) 2072{ 2073 struct instruction *insn = alloc_instruction(OP_RANGE, 0); 2074 2075 use_pseudo(insn, linearize_expression(ep, stmt->range_expression), &insn->src1); 2076 use_pseudo(insn, linearize_expression(ep, stmt->range_low), &insn->src2); 2077 use_pseudo(insn, linearize_expression(ep, stmt->range_high), &insn->src3); 2078 add_one_insn(ep, insn); 2079 return VOID; 2080} 2081 2082ALLOCATOR(asm_rules, "asm rules"); 2083ALLOCATOR(asm_constraint, "asm constraints"); 2084 2085static void add_asm_rule(struct instruction *insn, struct asm_constraint_list **list, struct asm_operand *op, pseudo_t pseudo) 2086{ 2087 struct asm_constraint *rule = __alloc_asm_constraint(0); 2088 rule->is_memory = op->is_memory; 2089 rule->ident = op->name; 2090 rule->constraint = op->constraint ? op->constraint->string->data : ""; 2091 use_pseudo(insn, pseudo, &rule->pseudo); 2092 add_ptr_list(list, rule); 2093} 2094 2095static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op) 2096{ 2097 pseudo_t pseudo = linearize_expression(ep, op->expr); 2098 2099 add_asm_rule(insn, &insn->asm_rules->inputs, op, pseudo); 2100} 2101 2102static void add_asm_output_address(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op) 2103{ 2104 pseudo_t pseudo; 2105 2106 if (!op->is_memory) 2107 return; 2108 2109 pseudo = linearize_expression(ep, op->expr); 2110 add_asm_rule(insn, &insn->asm_rules->outputs, op, pseudo); 2111 insn->output_memory = 1; 2112} 2113 2114static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op) 2115{ 2116 struct access_data ad = { NULL, }; 2117 pseudo_t pseudo; 2118 2119 if (op->is_memory) 2120 return; 2121 2122 if (!linearize_address_gen(ep, op->expr, &ad)) 2123 return; 2124 pseudo = alloc_pseudo(insn); 2125 linearize_store_gen(ep, pseudo, &ad); 2126 2127 add_asm_rule(insn, &insn->asm_rules->outputs, op, pseudo); 2128} 2129 2130static pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt) 2131{ 2132 struct instruction *insn; 2133 struct expression *expr, *clob; 2134 struct asm_rules *rules; 2135 struct asm_operand *op; 2136 2137 insn = alloc_instruction(OP_ASM, 0); 2138 expr = stmt->asm_string; 2139 if (!expr || expr->type != EXPR_STRING) { 2140 warning(stmt->pos, "expected string in inline asm"); 2141 return VOID; 2142 } 2143 insn->string = expr->string->data; 2144 2145 rules = __alloc_asm_rules(0); 2146 insn->asm_rules = rules; 2147 2148 /* Gather the inputs.. */ 2149 FOR_EACH_PTR(stmt->asm_inputs, op) { 2150 add_asm_input(ep, insn, op); 2151 } END_FOR_EACH_PTR(op); 2152 2153 /* ... and the addresses for memory outputs */ 2154 FOR_EACH_PTR(stmt->asm_outputs, op) { 2155 add_asm_output_address(ep, insn, op); 2156 } END_FOR_EACH_PTR(op); 2157 2158 add_one_insn(ep, insn); 2159 2160 /* Assign the outputs */ 2161 FOR_EACH_PTR(stmt->asm_outputs, op) { 2162 add_asm_output(ep, insn, op); 2163 } END_FOR_EACH_PTR(op); 2164 2165 /* and finally, look if it clobbers memory */ 2166 FOR_EACH_PTR(stmt->asm_clobbers, clob) { 2167 if (!strcmp(clob->string->data, "memory")) 2168 insn->clobber_memory = 1; 2169 } END_FOR_EACH_PTR(clob); 2170 2171 return VOID; 2172} 2173 2174static int multijmp_cmp(const void *_a, const void *_b) 2175{ 2176 const struct multijmp *a = _a; 2177 const struct multijmp *b = _b; 2178 2179 // "default" case? 2180 if (a->begin > a->end) { 2181 if (b->begin > b->end) 2182 return 0; 2183 return 1; 2184 } 2185 if (b->begin > b->end) 2186 return -1; 2187 if (a->begin == b->begin) { 2188 if (a->end == b->end) 2189 return 0; 2190 return (a->end < b->end) ? -1 : 1; 2191 } 2192 return a->begin < b->begin ? -1 : 1; 2193} 2194 2195static void sort_switch_cases(struct instruction *insn) 2196{ 2197 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp); 2198} 2199 2200static pseudo_t linearize_declaration(struct entrypoint *ep, struct statement *stmt) 2201{ 2202 struct symbol *sym; 2203 2204 concat_symbol_list(stmt->declaration, &ep->syms); 2205 2206 FOR_EACH_PTR(stmt->declaration, sym) { 2207 linearize_one_symbol(ep, sym); 2208 } END_FOR_EACH_PTR(sym); 2209 return VOID; 2210} 2211 2212static pseudo_t linearize_return(struct entrypoint *ep, struct statement *stmt) 2213{ 2214 struct expression *expr = stmt->expression; 2215 struct symbol *ret = stmt->ret_target; 2216 struct basic_block *bb_return = get_bound_block(ep, ret); 2217 struct basic_block *active; 2218 pseudo_t src = linearize_expression(ep, expr); 2219 active = ep->active; 2220 if (active && !is_void_type(ret)) { 2221 add_return(ep, bb_return, ret, src); 2222 } 2223 add_goto(ep, bb_return); 2224 return VOID; 2225} 2226 2227static pseudo_t linearize_switch(struct entrypoint *ep, struct statement *stmt) 2228{ 2229 struct symbol *sym; 2230 struct instruction *switch_ins; 2231 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos); 2232 struct basic_block *active, *default_case; 2233 struct expression *expr = stmt->switch_expression; 2234 struct multijmp *jmp; 2235 pseudo_t pseudo; 2236 2237 if (!expr || !expr->ctype) 2238 return VOID; 2239 pseudo = linearize_expression(ep, expr); 2240 active = ep->active; 2241 if (!active) { 2242 active = alloc_basic_block(ep, stmt->pos); 2243 set_activeblock(ep, active); 2244 } 2245 2246 switch_ins = alloc_typed_instruction(OP_SWITCH, expr->ctype); 2247 use_pseudo(switch_ins, pseudo, &switch_ins->cond); 2248 add_one_insn(ep, switch_ins); 2249 finish_block(ep); 2250 2251 default_case = NULL; 2252 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) { 2253 struct statement *case_stmt = sym->stmt; 2254 struct basic_block *bb_case = get_bound_block(ep, sym); 2255 2256 if (!case_stmt->case_expression) { 2257 default_case = bb_case; 2258 continue; 2259 } else if (case_stmt->case_expression->type != EXPR_VALUE) { 2260 continue; 2261 } else { 2262 struct expression *case_to = case_stmt->case_to; 2263 long long begin, end; 2264 2265 begin = end = case_stmt->case_expression->value; 2266 if (case_to && case_to->type == EXPR_VALUE) 2267 end = case_to->value; 2268 if (begin > end) 2269 jmp = alloc_multijmp(bb_case, end, begin); 2270 else 2271 jmp = alloc_multijmp(bb_case, begin, end); 2272 2273 } 2274 add_multijmp(&switch_ins->multijmp_list, jmp); 2275 add_bb(&bb_case->parents, active); 2276 add_bb(&active->children, bb_case); 2277 } END_FOR_EACH_PTR(sym); 2278 2279 bind_label(stmt->switch_break, switch_end, stmt->pos); 2280 2281 /* And linearize the actual statement */ 2282 linearize_statement(ep, stmt->switch_statement); 2283 set_activeblock(ep, switch_end); 2284 2285 if (!default_case) 2286 default_case = switch_end; 2287 2288 jmp = alloc_multijmp(default_case, 1, 0); 2289 add_multijmp(&switch_ins->multijmp_list, jmp); 2290 add_bb(&default_case->parents, active); 2291 add_bb(&active->children, default_case); 2292 sort_switch_cases(switch_ins); 2293 2294 return VOID; 2295} 2296 2297static pseudo_t linearize_iterator(struct entrypoint *ep, struct statement *stmt) 2298{ 2299 struct statement *pre_statement = stmt->iterator_pre_statement; 2300 struct expression *pre_condition = stmt->iterator_pre_condition; 2301 struct statement *statement = stmt->iterator_statement; 2302 struct statement *post_statement = stmt->iterator_post_statement; 2303 struct expression *post_condition = stmt->iterator_post_condition; 2304 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end; 2305 struct symbol *sym; 2306 2307 FOR_EACH_PTR(stmt->iterator_syms, sym) { 2308 linearize_one_symbol(ep, sym); 2309 } END_FOR_EACH_PTR(sym); 2310 concat_symbol_list(stmt->iterator_syms, &ep->syms); 2311 linearize_statement(ep, pre_statement); 2312 2313 loop_body = loop_top = alloc_basic_block(ep, stmt->pos); 2314 loop_continue = alloc_basic_block(ep, stmt->pos); 2315 loop_end = alloc_basic_block(ep, stmt->pos); 2316 2317 /* An empty post-condition means that it's the same as the pre-condition */ 2318 if (!post_condition) { 2319 loop_top = alloc_basic_block(ep, stmt->pos); 2320 set_activeblock(ep, loop_top); 2321 } 2322 2323 if (pre_condition) 2324 linearize_cond_branch(ep, pre_condition, loop_body, loop_end); 2325 2326 bind_label(stmt->iterator_continue, loop_continue, stmt->pos); 2327 bind_label(stmt->iterator_break, loop_end, stmt->pos); 2328 2329 set_activeblock(ep, loop_body); 2330 linearize_statement(ep, statement); 2331 add_goto(ep, loop_continue); 2332 2333 set_activeblock(ep, loop_continue); 2334 linearize_statement(ep, post_statement); 2335 if (!post_condition) 2336 add_goto(ep, loop_top); 2337 else 2338 linearize_cond_branch(ep, post_condition, loop_top, loop_end); 2339 set_activeblock(ep, loop_end); 2340 2341 return VOID; 2342} 2343 2344static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt) 2345{ 2346 struct basic_block *bb; 2347 2348 if (!stmt) 2349 return VOID; 2350 2351 bb = ep->active; 2352 if (bb && !bb->insns) 2353 bb->pos = stmt->pos; 2354 current_pos = stmt->pos; 2355 2356 switch (stmt->type) { 2357 case STMT_NONE: 2358 break; 2359 2360 case STMT_DECLARATION: 2361 return linearize_declaration(ep, stmt); 2362 2363 case STMT_CONTEXT: 2364 return linearize_context(ep, stmt); 2365 2366 case STMT_RANGE: 2367 return linearize_range(ep, stmt); 2368 2369 case STMT_EXPRESSION: 2370 return linearize_expression(ep, stmt->expression); 2371 2372 case STMT_ASM: 2373 return linearize_asm_statement(ep, stmt); 2374 2375 case STMT_RETURN: 2376 return linearize_return(ep, stmt); 2377 2378 case STMT_CASE: { 2379 add_label(ep, stmt->case_label); 2380 linearize_statement(ep, stmt->case_statement); 2381 break; 2382 } 2383 2384 case STMT_LABEL: { 2385 struct symbol *label = stmt->label_identifier; 2386 2387 if (label->used) { 2388 add_label(ep, label); 2389 } 2390 return linearize_statement(ep, stmt->label_statement); 2391 } 2392 2393 case STMT_GOTO: { 2394 struct symbol *sym; 2395 struct expression *expr; 2396 struct instruction *goto_ins; 2397 struct basic_block *active; 2398 pseudo_t pseudo; 2399 2400 active = ep->active; 2401 if (!bb_reachable(active)) 2402 break; 2403 2404 if (stmt->goto_label) { 2405 add_goto(ep, get_bound_block(ep, stmt->goto_label)); 2406 break; 2407 } 2408 2409 expr = stmt->goto_expression; 2410 if (!expr) 2411 break; 2412 2413 /* This can happen as part of simplification */ 2414 if (expr->type == EXPR_LABEL) { 2415 add_goto(ep, get_bound_block(ep, expr->label_symbol)); 2416 break; 2417 } 2418 2419 pseudo = linearize_expression(ep, expr); 2420 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0); 2421 use_pseudo(goto_ins, pseudo, &goto_ins->src); 2422 add_one_insn(ep, goto_ins); 2423 2424 FOR_EACH_PTR(stmt->target_list, sym) { 2425 struct basic_block *bb_computed = get_bound_block(ep, sym); 2426 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0); 2427 add_multijmp(&goto_ins->multijmp_list, jmp); 2428 add_bb(&bb_computed->parents, ep->active); 2429 add_bb(&active->children, bb_computed); 2430 } END_FOR_EACH_PTR(sym); 2431 2432 finish_block(ep); 2433 break; 2434 } 2435 2436 case STMT_COMPOUND: 2437 if (stmt->inline_fn) 2438 return linearize_inlined_call(ep, stmt); 2439 return linearize_compound_statement(ep, stmt); 2440 2441 /* 2442 * This could take 'likely/unlikely' into account, and 2443 * switch the arms around appropriately.. 2444 */ 2445 case STMT_IF: { 2446 struct basic_block *bb_true, *bb_false, *endif; 2447 struct expression *cond = stmt->if_conditional; 2448 2449 bb_true = alloc_basic_block(ep, stmt->pos); 2450 bb_false = endif = alloc_basic_block(ep, stmt->pos); 2451 2452 // If the condition is invalid, the following 2453 // statement(s) are not evaluated. 2454 if (!cond || !valid_type(cond->ctype)) 2455 return VOID; 2456 linearize_cond_branch(ep, cond, bb_true, bb_false); 2457 2458 set_activeblock(ep, bb_true); 2459 linearize_statement(ep, stmt->if_true); 2460 2461 if (stmt->if_false) { 2462 endif = alloc_basic_block(ep, stmt->pos); 2463 add_goto(ep, endif); 2464 set_activeblock(ep, bb_false); 2465 linearize_statement(ep, stmt->if_false); 2466 } 2467 set_activeblock(ep, endif); 2468 break; 2469 } 2470 2471 case STMT_SWITCH: 2472 return linearize_switch(ep, stmt); 2473 2474 case STMT_ITERATOR: 2475 return linearize_iterator(ep, stmt); 2476 2477 default: 2478 break; 2479 } 2480 return VOID; 2481} 2482 2483static void check_tainted_insn(struct instruction *insn) 2484{ 2485 unsigned long long uval; 2486 long long sval; 2487 pseudo_t src2; 2488 2489 switch (insn->opcode) { 2490 case OP_DIVU: case OP_DIVS: 2491 case OP_MODU: case OP_MODS: 2492 if (insn->src2 == value_pseudo(0)) 2493 warning(insn->pos, "divide by zero"); 2494 break; 2495 case OP_SHL: case OP_LSR: case OP_ASR: 2496 src2 = insn->src2; 2497 if (src2->type != PSEUDO_VAL) 2498 break; 2499 uval = src2->value; 2500 if (uval < insn->size) 2501 break; 2502 sval = sign_extend(uval, insn->size); 2503 if (Wshift_count_negative && sval < 0) 2504 warning(insn->pos, "shift count is negative (%lld)", sval); 2505 else if (Wshift_count_overflow) 2506 warning(insn->pos, "shift too big (%llu) for type %s", uval, show_typename(insn->type)); 2507 } 2508} 2509 2510/// 2511// issue warnings after all possible DCE 2512static void late_warnings(struct entrypoint *ep) 2513{ 2514 struct basic_block *bb; 2515 FOR_EACH_PTR(ep->bbs, bb) { 2516 struct instruction *insn; 2517 FOR_EACH_PTR(bb->insns, insn) { 2518 if (!insn->bb) 2519 continue; 2520 if (insn->tainted) 2521 check_tainted_insn(insn); 2522 switch (insn->opcode) { 2523 case OP_LOAD: 2524 // Check for illegal offsets. 2525 check_access(insn); 2526 break; 2527 } 2528 } END_FOR_EACH_PTR(insn); 2529 } END_FOR_EACH_PTR(bb); 2530} 2531 2532static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type) 2533{ 2534 struct statement *stmt = base_type->stmt; 2535 struct entrypoint *ep; 2536 struct basic_block *bb; 2537 struct symbol *ret_type; 2538 struct symbol *arg; 2539 struct instruction *entry; 2540 struct instruction *ret; 2541 pseudo_t result; 2542 int i; 2543 2544 if (!stmt || sym->bogus_linear) 2545 return NULL; 2546 2547 ep = alloc_entrypoint(); 2548 ep->name = sym; 2549 sym->ep = ep; 2550 bb = alloc_basic_block(ep, sym->pos); 2551 set_activeblock(ep, bb); 2552 2553 if (stmt->type == STMT_ASM) { // top-level asm 2554 linearize_asm_statement(ep, stmt); 2555 return ep; 2556 } 2557 2558 entry = alloc_instruction(OP_ENTRY, 0); 2559 add_one_insn(ep, entry); 2560 ep->entry = entry; 2561 2562 concat_symbol_list(base_type->arguments, &ep->syms); 2563 2564 /* FIXME!! We should do something else about varargs.. */ 2565 i = 0; 2566 FOR_EACH_PTR(base_type->arguments, arg) { 2567 linearize_argument(ep, arg, ++i); 2568 } END_FOR_EACH_PTR(arg); 2569 2570 result = linearize_fn_statement(ep, stmt); 2571 ret_type = base_type->ctype.base_type; 2572 ret = alloc_typed_instruction(OP_RET, ret_type); 2573 if (type_size(ret_type) > 0) 2574 use_pseudo(ret, result, &ret->src); 2575 add_one_insn(ep, ret); 2576 2577 optimize(ep); 2578 late_warnings(ep); 2579 return ep; 2580} 2581 2582struct entrypoint *linearize_symbol(struct symbol *sym) 2583{ 2584 struct symbol *base_type; 2585 2586 if (!sym) 2587 return NULL; 2588 current_pos = sym->pos; 2589 base_type = sym->ctype.base_type; 2590 if (!base_type) 2591 return NULL; 2592 if (base_type->type == SYM_FN) 2593 return linearize_fn(sym, base_type); 2594 return NULL; 2595} 2596 2597/* 2598 * Builtin functions 2599 */ 2600 2601static pseudo_t linearize_fma(struct entrypoint *ep, struct expression *expr) 2602{ 2603 struct instruction *insn = alloc_typed_instruction(OP_FMADD, expr->ctype); 2604 struct expression *arg; 2605 2606 PREPARE_PTR_LIST(expr->args, arg); 2607 use_pseudo(insn, linearize_expression(ep, arg), &insn->src1); 2608 NEXT_PTR_LIST(arg) 2609 use_pseudo(insn, linearize_expression(ep, arg), &insn->src2); 2610 NEXT_PTR_LIST(arg) 2611 use_pseudo(insn, linearize_expression(ep, arg), &insn->src3); 2612 FINISH_PTR_LIST(arg); 2613 2614 add_one_insn(ep, insn); 2615 return insn->target = alloc_pseudo(insn); 2616} 2617 2618static pseudo_t linearize_isdigit(struct entrypoint *ep, struct expression *expr) 2619{ 2620 struct instruction *insn; 2621 pseudo_t src; 2622 2623 insn = alloc_typed_instruction(OP_SUB, &int_ctype); 2624 src = linearize_expression(ep, first_expression(expr->args)); 2625 use_pseudo(insn, src, &insn->src1); 2626 insn->src2 = value_pseudo('0'); 2627 src = insn->target = alloc_pseudo(insn); 2628 add_one_insn(ep, insn); 2629 2630 insn = alloc_typed_instruction(OP_SET_BE, &int_ctype); 2631 use_pseudo(insn, src, &insn->src1); 2632 insn->src2 = value_pseudo(9); 2633 insn->target = alloc_pseudo(insn); 2634 insn->itype = &int_ctype; 2635 add_one_insn(ep, insn); 2636 2637 return insn->target; 2638} 2639 2640static pseudo_t linearize_unreachable(struct entrypoint *ep, struct expression *exp) 2641{ 2642 add_unreachable(ep); 2643 return VOID; 2644} 2645 2646static struct sym_init { 2647 const char *name; 2648 pseudo_t (*linearize)(struct entrypoint *, struct expression*); 2649 struct symbol_op op; 2650} builtins_table[] = { 2651 // must be declared in builtin.c:declare_builtins[] 2652 { "__builtin_fma", linearize_fma }, 2653 { "__builtin_fmaf", linearize_fma }, 2654 { "__builtin_fmal", linearize_fma }, 2655 { "__builtin_isdigit", linearize_isdigit }, 2656 { "__builtin_unreachable", linearize_unreachable }, 2657 { } 2658}; 2659 2660void init_linearized_builtins(int stream) 2661{ 2662 struct sym_init *ptr; 2663 2664 for (ptr = builtins_table; ptr->name; ptr++) { 2665 struct symbol *sym; 2666 sym = create_symbol(stream, ptr->name, SYM_NODE, NS_SYMBOL); 2667 if (!sym->op) 2668 sym->op = &ptr->op; 2669 sym->op->type |= KW_BUILTIN; 2670 sym->op->linearize = ptr->linearize; 2671 } 2672} 2673