1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * x86 instruction analysis 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004, 2009 6 */ 7 8#ifdef __KERNEL__ 9#include <linux/string.h> 10#else 11#include <string.h> 12#endif 13#include <asm/inat.h> /*__ignore_sync_check__ */ 14#include <asm/insn.h> /* __ignore_sync_check__ */ 15 16#include <linux/errno.h> 17#include <linux/kconfig.h> 18 19#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */ 20 21/* Verify next sizeof(t) bytes can be on the same instruction */ 22#define validate_next(t, insn, n) \ 23 ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) 24 25#define __get_next(t, insn) \ 26 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) 27 28#define __peek_nbyte_next(t, insn, n) \ 29 ({ t r = *(t*)((insn)->next_byte + n); r; }) 30 31#define get_next(t, insn) \ 32 ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) 33 34#define peek_nbyte_next(t, insn, n) \ 35 ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) 36 37#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) 38 39/** 40 * insn_init() - initialize struct insn 41 * @insn: &struct insn to be initialized 42 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 43 * @x86_64: !0 for 64-bit kernel or 64-bit app 44 */ 45void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) 46{ 47 /* 48 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid 49 * even if the input buffer is long enough to hold them. 50 */ 51 if (buf_len > MAX_INSN_SIZE) 52 buf_len = MAX_INSN_SIZE; 53 54 memset(insn, 0, sizeof(*insn)); 55 insn->kaddr = kaddr; 56 insn->end_kaddr = kaddr + buf_len; 57 insn->next_byte = kaddr; 58 insn->x86_64 = x86_64 ? 1 : 0; 59 insn->opnd_bytes = 4; 60 if (x86_64) 61 insn->addr_bytes = 8; 62 else 63 insn->addr_bytes = 4; 64} 65 66static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; 67static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; 68 69static int __insn_get_emulate_prefix(struct insn *insn, 70 const insn_byte_t *prefix, size_t len) 71{ 72 size_t i; 73 74 for (i = 0; i < len; i++) { 75 if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) 76 goto err_out; 77 } 78 79 insn->emulate_prefix_size = len; 80 insn->next_byte += len; 81 82 return 1; 83 84err_out: 85 return 0; 86} 87 88static void insn_get_emulate_prefix(struct insn *insn) 89{ 90 if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) 91 return; 92 93 __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); 94} 95 96/** 97 * insn_get_prefixes - scan x86 instruction prefix bytes 98 * @insn: &struct insn containing instruction 99 * 100 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte 101 * to point to the (first) opcode. No effect if @insn->prefixes.got 102 * is already set. 103 * 104 * * Returns: 105 * 0: on success 106 * < 0: on error 107 */ 108int insn_get_prefixes(struct insn *insn) 109{ 110 struct insn_field *prefixes = &insn->prefixes; 111 insn_attr_t attr; 112 insn_byte_t b, lb; 113 int i, nb; 114 115 if (prefixes->got) 116 return 0; 117 118 insn_get_emulate_prefix(insn); 119 120 nb = 0; 121 lb = 0; 122 b = peek_next(insn_byte_t, insn); 123 attr = inat_get_opcode_attribute(b); 124 while (inat_is_legacy_prefix(attr)) { 125 /* Skip if same prefix */ 126 for (i = 0; i < nb; i++) 127 if (prefixes->bytes[i] == b) 128 goto found; 129 if (nb == 4) 130 /* Invalid instruction */ 131 break; 132 prefixes->bytes[nb++] = b; 133 if (inat_is_address_size_prefix(attr)) { 134 /* address size switches 2/4 or 4/8 */ 135 if (insn->x86_64) 136 insn->addr_bytes ^= 12; 137 else 138 insn->addr_bytes ^= 6; 139 } else if (inat_is_operand_size_prefix(attr)) { 140 /* oprand size switches 2/4 */ 141 insn->opnd_bytes ^= 6; 142 } 143found: 144 prefixes->nbytes++; 145 insn->next_byte++; 146 lb = b; 147 b = peek_next(insn_byte_t, insn); 148 attr = inat_get_opcode_attribute(b); 149 } 150 /* Set the last prefix */ 151 if (lb && lb != insn->prefixes.bytes[3]) { 152 if (unlikely(insn->prefixes.bytes[3])) { 153 /* Swap the last prefix */ 154 b = insn->prefixes.bytes[3]; 155 for (i = 0; i < nb; i++) 156 if (prefixes->bytes[i] == lb) 157 prefixes->bytes[i] = b; 158 } 159 insn->prefixes.bytes[3] = lb; 160 } 161 162 /* Decode REX prefix */ 163 if (insn->x86_64) { 164 b = peek_next(insn_byte_t, insn); 165 attr = inat_get_opcode_attribute(b); 166 if (inat_is_rex_prefix(attr)) { 167 insn->rex_prefix.value = b; 168 insn->rex_prefix.nbytes = 1; 169 insn->next_byte++; 170 if (X86_REX_W(b)) 171 /* REX.W overrides opnd_size */ 172 insn->opnd_bytes = 8; 173 } 174 } 175 insn->rex_prefix.got = 1; 176 177 /* Decode VEX prefix */ 178 b = peek_next(insn_byte_t, insn); 179 attr = inat_get_opcode_attribute(b); 180 if (inat_is_vex_prefix(attr)) { 181 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); 182 if (!insn->x86_64) { 183 /* 184 * In 32-bits mode, if the [7:6] bits (mod bits of 185 * ModRM) on the second byte are not 11b, it is 186 * LDS or LES or BOUND. 187 */ 188 if (X86_MODRM_MOD(b2) != 3) 189 goto vex_end; 190 } 191 insn->vex_prefix.bytes[0] = b; 192 insn->vex_prefix.bytes[1] = b2; 193 if (inat_is_evex_prefix(attr)) { 194 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 195 insn->vex_prefix.bytes[2] = b2; 196 b2 = peek_nbyte_next(insn_byte_t, insn, 3); 197 insn->vex_prefix.bytes[3] = b2; 198 insn->vex_prefix.nbytes = 4; 199 insn->next_byte += 4; 200 if (insn->x86_64 && X86_VEX_W(b2)) 201 /* VEX.W overrides opnd_size */ 202 insn->opnd_bytes = 8; 203 } else if (inat_is_vex3_prefix(attr)) { 204 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 205 insn->vex_prefix.bytes[2] = b2; 206 insn->vex_prefix.nbytes = 3; 207 insn->next_byte += 3; 208 if (insn->x86_64 && X86_VEX_W(b2)) 209 /* VEX.W overrides opnd_size */ 210 insn->opnd_bytes = 8; 211 } else { 212 /* 213 * For VEX2, fake VEX3-like byte#2. 214 * Makes it easier to decode vex.W, vex.vvvv, 215 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. 216 */ 217 insn->vex_prefix.bytes[2] = b2 & 0x7f; 218 insn->vex_prefix.nbytes = 2; 219 insn->next_byte += 2; 220 } 221 } 222vex_end: 223 insn->vex_prefix.got = 1; 224 225 prefixes->got = 1; 226 227 return 0; 228 229err_out: 230 return -ENODATA; 231} 232 233/** 234 * insn_get_opcode - collect opcode(s) 235 * @insn: &struct insn containing instruction 236 * 237 * Populates @insn->opcode, updates @insn->next_byte to point past the 238 * opcode byte(s), and set @insn->attr (except for groups). 239 * If necessary, first collects any preceding (prefix) bytes. 240 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got 241 * is already 1. 242 * 243 * Returns: 244 * 0: on success 245 * < 0: on error 246 */ 247int insn_get_opcode(struct insn *insn) 248{ 249 struct insn_field *opcode = &insn->opcode; 250 int pfx_id, ret; 251 insn_byte_t op; 252 253 if (opcode->got) 254 return 0; 255 256 if (!insn->prefixes.got) { 257 ret = insn_get_prefixes(insn); 258 if (ret) 259 return ret; 260 } 261 262 /* Get first opcode */ 263 op = get_next(insn_byte_t, insn); 264 opcode->bytes[0] = op; 265 opcode->nbytes = 1; 266 267 /* Check if there is VEX prefix or not */ 268 if (insn_is_avx(insn)) { 269 insn_byte_t m, p; 270 m = insn_vex_m_bits(insn); 271 p = insn_vex_p_bits(insn); 272 insn->attr = inat_get_avx_attribute(op, m, p); 273 if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || 274 (!inat_accept_vex(insn->attr) && 275 !inat_is_group(insn->attr))) { 276 /* This instruction is bad */ 277 insn->attr = 0; 278 return -EINVAL; 279 } 280 /* VEX has only 1 byte for opcode */ 281 goto end; 282 } 283 284 insn->attr = inat_get_opcode_attribute(op); 285 while (inat_is_escape(insn->attr)) { 286 /* Get escaped opcode */ 287 op = get_next(insn_byte_t, insn); 288 opcode->bytes[opcode->nbytes++] = op; 289 pfx_id = insn_last_prefix_id(insn); 290 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); 291 } 292 293 if (inat_must_vex(insn->attr)) { 294 /* This instruction is bad */ 295 insn->attr = 0; 296 return -EINVAL; 297 } 298end: 299 opcode->got = 1; 300 return 0; 301 302err_out: 303 return -ENODATA; 304} 305 306/** 307 * insn_get_modrm - collect ModRM byte, if any 308 * @insn: &struct insn containing instruction 309 * 310 * Populates @insn->modrm and updates @insn->next_byte to point past the 311 * ModRM byte, if any. If necessary, first collects the preceding bytes 312 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. 313 * 314 * Returns: 315 * 0: on success 316 * < 0: on error 317 */ 318int insn_get_modrm(struct insn *insn) 319{ 320 struct insn_field *modrm = &insn->modrm; 321 insn_byte_t pfx_id, mod; 322 int ret; 323 324 if (modrm->got) 325 return 0; 326 327 if (!insn->opcode.got) { 328 ret = insn_get_opcode(insn); 329 if (ret) 330 return ret; 331 } 332 333 if (inat_has_modrm(insn->attr)) { 334 mod = get_next(insn_byte_t, insn); 335 modrm->value = mod; 336 modrm->nbytes = 1; 337 if (inat_is_group(insn->attr)) { 338 pfx_id = insn_last_prefix_id(insn); 339 insn->attr = inat_get_group_attribute(mod, pfx_id, 340 insn->attr); 341 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { 342 /* Bad insn */ 343 insn->attr = 0; 344 return -EINVAL; 345 } 346 } 347 } 348 349 if (insn->x86_64 && inat_is_force64(insn->attr)) 350 insn->opnd_bytes = 8; 351 352 modrm->got = 1; 353 return 0; 354 355err_out: 356 return -ENODATA; 357} 358 359 360/** 361 * insn_rip_relative() - Does instruction use RIP-relative addressing mode? 362 * @insn: &struct insn containing instruction 363 * 364 * If necessary, first collects the instruction up to and including the 365 * ModRM byte. No effect if @insn->x86_64 is 0. 366 */ 367int insn_rip_relative(struct insn *insn) 368{ 369 struct insn_field *modrm = &insn->modrm; 370 int ret; 371 372 if (!insn->x86_64) 373 return 0; 374 375 if (!modrm->got) { 376 ret = insn_get_modrm(insn); 377 if (ret) 378 return 0; 379 } 380 /* 381 * For rip-relative instructions, the mod field (top 2 bits) 382 * is zero and the r/m field (bottom 3 bits) is 0x5. 383 */ 384 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); 385} 386 387/** 388 * insn_get_sib() - Get the SIB byte of instruction 389 * @insn: &struct insn containing instruction 390 * 391 * If necessary, first collects the instruction up to and including the 392 * ModRM byte. 393 * 394 * Returns: 395 * 0: if decoding succeeded 396 * < 0: otherwise. 397 */ 398int insn_get_sib(struct insn *insn) 399{ 400 insn_byte_t modrm; 401 int ret; 402 403 if (insn->sib.got) 404 return 0; 405 406 if (!insn->modrm.got) { 407 ret = insn_get_modrm(insn); 408 if (ret) 409 return ret; 410 } 411 412 if (insn->modrm.nbytes) { 413 modrm = (insn_byte_t)insn->modrm.value; 414 if (insn->addr_bytes != 2 && 415 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { 416 insn->sib.value = get_next(insn_byte_t, insn); 417 insn->sib.nbytes = 1; 418 } 419 } 420 insn->sib.got = 1; 421 422 return 0; 423 424err_out: 425 return -ENODATA; 426} 427 428 429/** 430 * insn_get_displacement() - Get the displacement of instruction 431 * @insn: &struct insn containing instruction 432 * 433 * If necessary, first collects the instruction up to and including the 434 * SIB byte. 435 * Displacement value is sign-expanded. 436 * 437 * * Returns: 438 * 0: if decoding succeeded 439 * < 0: otherwise. 440 */ 441int insn_get_displacement(struct insn *insn) 442{ 443 insn_byte_t mod, rm, base; 444 int ret; 445 446 if (insn->displacement.got) 447 return 0; 448 449 if (!insn->sib.got) { 450 ret = insn_get_sib(insn); 451 if (ret) 452 return ret; 453 } 454 455 if (insn->modrm.nbytes) { 456 /* 457 * Interpreting the modrm byte: 458 * mod = 00 - no displacement fields (exceptions below) 459 * mod = 01 - 1-byte displacement field 460 * mod = 10 - displacement field is 4 bytes, or 2 bytes if 461 * address size = 2 (0x67 prefix in 32-bit mode) 462 * mod = 11 - no memory operand 463 * 464 * If address size = 2... 465 * mod = 00, r/m = 110 - displacement field is 2 bytes 466 * 467 * If address size != 2... 468 * mod != 11, r/m = 100 - SIB byte exists 469 * mod = 00, SIB base = 101 - displacement field is 4 bytes 470 * mod = 00, r/m = 101 - rip-relative addressing, displacement 471 * field is 4 bytes 472 */ 473 mod = X86_MODRM_MOD(insn->modrm.value); 474 rm = X86_MODRM_RM(insn->modrm.value); 475 base = X86_SIB_BASE(insn->sib.value); 476 if (mod == 3) 477 goto out; 478 if (mod == 1) { 479 insn->displacement.value = get_next(signed char, insn); 480 insn->displacement.nbytes = 1; 481 } else if (insn->addr_bytes == 2) { 482 if ((mod == 0 && rm == 6) || mod == 2) { 483 insn->displacement.value = 484 get_next(short, insn); 485 insn->displacement.nbytes = 2; 486 } 487 } else { 488 if ((mod == 0 && rm == 5) || mod == 2 || 489 (mod == 0 && base == 5)) { 490 insn->displacement.value = get_next(int, insn); 491 insn->displacement.nbytes = 4; 492 } 493 } 494 } 495out: 496 insn->displacement.got = 1; 497 return 0; 498 499err_out: 500 return -ENODATA; 501} 502 503/* Decode moffset16/32/64. Return 0 if failed */ 504static int __get_moffset(struct insn *insn) 505{ 506 switch (insn->addr_bytes) { 507 case 2: 508 insn->moffset1.value = get_next(short, insn); 509 insn->moffset1.nbytes = 2; 510 break; 511 case 4: 512 insn->moffset1.value = get_next(int, insn); 513 insn->moffset1.nbytes = 4; 514 break; 515 case 8: 516 insn->moffset1.value = get_next(int, insn); 517 insn->moffset1.nbytes = 4; 518 insn->moffset2.value = get_next(int, insn); 519 insn->moffset2.nbytes = 4; 520 break; 521 default: /* opnd_bytes must be modified manually */ 522 goto err_out; 523 } 524 insn->moffset1.got = insn->moffset2.got = 1; 525 526 return 1; 527 528err_out: 529 return 0; 530} 531 532/* Decode imm v32(Iz). Return 0 if failed */ 533static int __get_immv32(struct insn *insn) 534{ 535 switch (insn->opnd_bytes) { 536 case 2: 537 insn->immediate.value = get_next(short, insn); 538 insn->immediate.nbytes = 2; 539 break; 540 case 4: 541 case 8: 542 insn->immediate.value = get_next(int, insn); 543 insn->immediate.nbytes = 4; 544 break; 545 default: /* opnd_bytes must be modified manually */ 546 goto err_out; 547 } 548 549 return 1; 550 551err_out: 552 return 0; 553} 554 555/* Decode imm v64(Iv/Ov), Return 0 if failed */ 556static int __get_immv(struct insn *insn) 557{ 558 switch (insn->opnd_bytes) { 559 case 2: 560 insn->immediate1.value = get_next(short, insn); 561 insn->immediate1.nbytes = 2; 562 break; 563 case 4: 564 insn->immediate1.value = get_next(int, insn); 565 insn->immediate1.nbytes = 4; 566 break; 567 case 8: 568 insn->immediate1.value = get_next(int, insn); 569 insn->immediate1.nbytes = 4; 570 insn->immediate2.value = get_next(int, insn); 571 insn->immediate2.nbytes = 4; 572 break; 573 default: /* opnd_bytes must be modified manually */ 574 goto err_out; 575 } 576 insn->immediate1.got = insn->immediate2.got = 1; 577 578 return 1; 579err_out: 580 return 0; 581} 582 583/* Decode ptr16:16/32(Ap) */ 584static int __get_immptr(struct insn *insn) 585{ 586 switch (insn->opnd_bytes) { 587 case 2: 588 insn->immediate1.value = get_next(short, insn); 589 insn->immediate1.nbytes = 2; 590 break; 591 case 4: 592 insn->immediate1.value = get_next(int, insn); 593 insn->immediate1.nbytes = 4; 594 break; 595 case 8: 596 /* ptr16:64 is not exist (no segment) */ 597 return 0; 598 default: /* opnd_bytes must be modified manually */ 599 goto err_out; 600 } 601 insn->immediate2.value = get_next(unsigned short, insn); 602 insn->immediate2.nbytes = 2; 603 insn->immediate1.got = insn->immediate2.got = 1; 604 605 return 1; 606err_out: 607 return 0; 608} 609 610/** 611 * insn_get_immediate() - Get the immediate in an instruction 612 * @insn: &struct insn containing instruction 613 * 614 * If necessary, first collects the instruction up to and including the 615 * displacement bytes. 616 * Basically, most of immediates are sign-expanded. Unsigned-value can be 617 * computed by bit masking with ((1 << (nbytes * 8)) - 1) 618 * 619 * Returns: 620 * 0: on success 621 * < 0: on error 622 */ 623int insn_get_immediate(struct insn *insn) 624{ 625 int ret; 626 627 if (insn->immediate.got) 628 return 0; 629 630 if (!insn->displacement.got) { 631 ret = insn_get_displacement(insn); 632 if (ret) 633 return ret; 634 } 635 636 if (inat_has_moffset(insn->attr)) { 637 if (!__get_moffset(insn)) 638 goto err_out; 639 goto done; 640 } 641 642 if (!inat_has_immediate(insn->attr)) 643 /* no immediates */ 644 goto done; 645 646 switch (inat_immediate_size(insn->attr)) { 647 case INAT_IMM_BYTE: 648 insn->immediate.value = get_next(signed char, insn); 649 insn->immediate.nbytes = 1; 650 break; 651 case INAT_IMM_WORD: 652 insn->immediate.value = get_next(short, insn); 653 insn->immediate.nbytes = 2; 654 break; 655 case INAT_IMM_DWORD: 656 insn->immediate.value = get_next(int, insn); 657 insn->immediate.nbytes = 4; 658 break; 659 case INAT_IMM_QWORD: 660 insn->immediate1.value = get_next(int, insn); 661 insn->immediate1.nbytes = 4; 662 insn->immediate2.value = get_next(int, insn); 663 insn->immediate2.nbytes = 4; 664 break; 665 case INAT_IMM_PTR: 666 if (!__get_immptr(insn)) 667 goto err_out; 668 break; 669 case INAT_IMM_VWORD32: 670 if (!__get_immv32(insn)) 671 goto err_out; 672 break; 673 case INAT_IMM_VWORD: 674 if (!__get_immv(insn)) 675 goto err_out; 676 break; 677 default: 678 /* Here, insn must have an immediate, but failed */ 679 goto err_out; 680 } 681 if (inat_has_second_immediate(insn->attr)) { 682 insn->immediate2.value = get_next(signed char, insn); 683 insn->immediate2.nbytes = 1; 684 } 685done: 686 insn->immediate.got = 1; 687 return 0; 688 689err_out: 690 return -ENODATA; 691} 692 693/** 694 * insn_get_length() - Get the length of instruction 695 * @insn: &struct insn containing instruction 696 * 697 * If necessary, first collects the instruction up to and including the 698 * immediates bytes. 699 * 700 * Returns: 701 * - 0 on success 702 * - < 0 on error 703*/ 704int insn_get_length(struct insn *insn) 705{ 706 int ret; 707 708 if (insn->length) 709 return 0; 710 711 if (!insn->immediate.got) { 712 ret = insn_get_immediate(insn); 713 if (ret) 714 return ret; 715 } 716 717 insn->length = (unsigned char)((unsigned long)insn->next_byte 718 - (unsigned long)insn->kaddr); 719 720 return 0; 721} 722 723/** 724 * insn_decode() - Decode an x86 instruction 725 * @insn: &struct insn to be initialized 726 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 727 * @buf_len: length of the insn buffer at @kaddr 728 * @m: insn mode, see enum insn_mode 729 * 730 * Returns: 731 * 0: if decoding succeeded 732 * < 0: otherwise. 733 */ 734int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) 735{ 736 int ret; 737 738/* #define INSN_MODE_KERN -1 __ignore_sync_check__ mode is only valid in the kernel */ 739 740 if (m == INSN_MODE_KERN) 741 insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); 742 else 743 insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); 744 745 ret = insn_get_length(insn); 746 if (ret) 747 return ret; 748 749 if (insn_complete(insn)) 750 return 0; 751 752 return -EINVAL; 753} 754