xref: /kernel/linux/linux-5.10/arch/x86/lib/insn.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * x86 instruction analysis
4 *
5 * Copyright (C) IBM Corporation, 2002, 2004, 2009
6 */
7
8#ifdef __KERNEL__
9#include <linux/string.h>
10#else
11#include <string.h>
12#endif
13#include <asm/inat.h> /*__ignore_sync_check__ */
14#include <asm/insn.h> /* __ignore_sync_check__ */
15
16#include <linux/errno.h>
17#include <linux/kconfig.h>
18
19#include <asm/emulate_prefix.h> /* __ignore_sync_check__ */
20
21/* Verify next sizeof(t) bytes can be on the same instruction */
22#define validate_next(t, insn, n)	\
23	((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
24
25#define __get_next(t, insn)	\
26	({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
27
28#define __peek_nbyte_next(t, insn, n)	\
29	({ t r = *(t*)((insn)->next_byte + n); r; })
30
31#define get_next(t, insn)	\
32	({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
33
34#define peek_nbyte_next(t, insn, n)	\
35	({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
36
37#define peek_next(t, insn)	peek_nbyte_next(t, insn, 0)
38
39/**
40 * insn_init() - initialize struct insn
41 * @insn:	&struct insn to be initialized
42 * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
43 * @x86_64:	!0 for 64-bit kernel or 64-bit app
44 */
45void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
46{
47	/*
48	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
49	 * even if the input buffer is long enough to hold them.
50	 */
51	if (buf_len > MAX_INSN_SIZE)
52		buf_len = MAX_INSN_SIZE;
53
54	memset(insn, 0, sizeof(*insn));
55	insn->kaddr = kaddr;
56	insn->end_kaddr = kaddr + buf_len;
57	insn->next_byte = kaddr;
58	insn->x86_64 = x86_64 ? 1 : 0;
59	insn->opnd_bytes = 4;
60	if (x86_64)
61		insn->addr_bytes = 8;
62	else
63		insn->addr_bytes = 4;
64}
65
66static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
67static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
68
69static int __insn_get_emulate_prefix(struct insn *insn,
70				     const insn_byte_t *prefix, size_t len)
71{
72	size_t i;
73
74	for (i = 0; i < len; i++) {
75		if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
76			goto err_out;
77	}
78
79	insn->emulate_prefix_size = len;
80	insn->next_byte += len;
81
82	return 1;
83
84err_out:
85	return 0;
86}
87
88static void insn_get_emulate_prefix(struct insn *insn)
89{
90	if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
91		return;
92
93	__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
94}
95
96/**
97 * insn_get_prefixes - scan x86 instruction prefix bytes
98 * @insn:	&struct insn containing instruction
99 *
100 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
101 * to point to the (first) opcode.  No effect if @insn->prefixes.got
102 * is already set.
103 *
104 * * Returns:
105 * 0:  on success
106 * < 0: on error
107 */
108int insn_get_prefixes(struct insn *insn)
109{
110	struct insn_field *prefixes = &insn->prefixes;
111	insn_attr_t attr;
112	insn_byte_t b, lb;
113	int i, nb;
114
115	if (prefixes->got)
116		return 0;
117
118	insn_get_emulate_prefix(insn);
119
120	nb = 0;
121	lb = 0;
122	b = peek_next(insn_byte_t, insn);
123	attr = inat_get_opcode_attribute(b);
124	while (inat_is_legacy_prefix(attr)) {
125		/* Skip if same prefix */
126		for (i = 0; i < nb; i++)
127			if (prefixes->bytes[i] == b)
128				goto found;
129		if (nb == 4)
130			/* Invalid instruction */
131			break;
132		prefixes->bytes[nb++] = b;
133		if (inat_is_address_size_prefix(attr)) {
134			/* address size switches 2/4 or 4/8 */
135			if (insn->x86_64)
136				insn->addr_bytes ^= 12;
137			else
138				insn->addr_bytes ^= 6;
139		} else if (inat_is_operand_size_prefix(attr)) {
140			/* oprand size switches 2/4 */
141			insn->opnd_bytes ^= 6;
142		}
143found:
144		prefixes->nbytes++;
145		insn->next_byte++;
146		lb = b;
147		b = peek_next(insn_byte_t, insn);
148		attr = inat_get_opcode_attribute(b);
149	}
150	/* Set the last prefix */
151	if (lb && lb != insn->prefixes.bytes[3]) {
152		if (unlikely(insn->prefixes.bytes[3])) {
153			/* Swap the last prefix */
154			b = insn->prefixes.bytes[3];
155			for (i = 0; i < nb; i++)
156				if (prefixes->bytes[i] == lb)
157					prefixes->bytes[i] = b;
158		}
159		insn->prefixes.bytes[3] = lb;
160	}
161
162	/* Decode REX prefix */
163	if (insn->x86_64) {
164		b = peek_next(insn_byte_t, insn);
165		attr = inat_get_opcode_attribute(b);
166		if (inat_is_rex_prefix(attr)) {
167			insn->rex_prefix.value = b;
168			insn->rex_prefix.nbytes = 1;
169			insn->next_byte++;
170			if (X86_REX_W(b))
171				/* REX.W overrides opnd_size */
172				insn->opnd_bytes = 8;
173		}
174	}
175	insn->rex_prefix.got = 1;
176
177	/* Decode VEX prefix */
178	b = peek_next(insn_byte_t, insn);
179	attr = inat_get_opcode_attribute(b);
180	if (inat_is_vex_prefix(attr)) {
181		insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
182		if (!insn->x86_64) {
183			/*
184			 * In 32-bits mode, if the [7:6] bits (mod bits of
185			 * ModRM) on the second byte are not 11b, it is
186			 * LDS or LES or BOUND.
187			 */
188			if (X86_MODRM_MOD(b2) != 3)
189				goto vex_end;
190		}
191		insn->vex_prefix.bytes[0] = b;
192		insn->vex_prefix.bytes[1] = b2;
193		if (inat_is_evex_prefix(attr)) {
194			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
195			insn->vex_prefix.bytes[2] = b2;
196			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
197			insn->vex_prefix.bytes[3] = b2;
198			insn->vex_prefix.nbytes = 4;
199			insn->next_byte += 4;
200			if (insn->x86_64 && X86_VEX_W(b2))
201				/* VEX.W overrides opnd_size */
202				insn->opnd_bytes = 8;
203		} else if (inat_is_vex3_prefix(attr)) {
204			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
205			insn->vex_prefix.bytes[2] = b2;
206			insn->vex_prefix.nbytes = 3;
207			insn->next_byte += 3;
208			if (insn->x86_64 && X86_VEX_W(b2))
209				/* VEX.W overrides opnd_size */
210				insn->opnd_bytes = 8;
211		} else {
212			/*
213			 * For VEX2, fake VEX3-like byte#2.
214			 * Makes it easier to decode vex.W, vex.vvvv,
215			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
216			 */
217			insn->vex_prefix.bytes[2] = b2 & 0x7f;
218			insn->vex_prefix.nbytes = 2;
219			insn->next_byte += 2;
220		}
221	}
222vex_end:
223	insn->vex_prefix.got = 1;
224
225	prefixes->got = 1;
226
227	return 0;
228
229err_out:
230	return -ENODATA;
231}
232
233/**
234 * insn_get_opcode - collect opcode(s)
235 * @insn:	&struct insn containing instruction
236 *
237 * Populates @insn->opcode, updates @insn->next_byte to point past the
238 * opcode byte(s), and set @insn->attr (except for groups).
239 * If necessary, first collects any preceding (prefix) bytes.
240 * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
241 * is already 1.
242 *
243 * Returns:
244 * 0:  on success
245 * < 0: on error
246 */
247int insn_get_opcode(struct insn *insn)
248{
249	struct insn_field *opcode = &insn->opcode;
250	int pfx_id, ret;
251	insn_byte_t op;
252
253	if (opcode->got)
254		return 0;
255
256	if (!insn->prefixes.got) {
257		ret = insn_get_prefixes(insn);
258		if (ret)
259			return ret;
260	}
261
262	/* Get first opcode */
263	op = get_next(insn_byte_t, insn);
264	opcode->bytes[0] = op;
265	opcode->nbytes = 1;
266
267	/* Check if there is VEX prefix or not */
268	if (insn_is_avx(insn)) {
269		insn_byte_t m, p;
270		m = insn_vex_m_bits(insn);
271		p = insn_vex_p_bits(insn);
272		insn->attr = inat_get_avx_attribute(op, m, p);
273		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
274		    (!inat_accept_vex(insn->attr) &&
275		     !inat_is_group(insn->attr))) {
276			/* This instruction is bad */
277			insn->attr = 0;
278			return -EINVAL;
279		}
280		/* VEX has only 1 byte for opcode */
281		goto end;
282	}
283
284	insn->attr = inat_get_opcode_attribute(op);
285	while (inat_is_escape(insn->attr)) {
286		/* Get escaped opcode */
287		op = get_next(insn_byte_t, insn);
288		opcode->bytes[opcode->nbytes++] = op;
289		pfx_id = insn_last_prefix_id(insn);
290		insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
291	}
292
293	if (inat_must_vex(insn->attr)) {
294		/* This instruction is bad */
295		insn->attr = 0;
296		return -EINVAL;
297	}
298end:
299	opcode->got = 1;
300	return 0;
301
302err_out:
303	return -ENODATA;
304}
305
306/**
307 * insn_get_modrm - collect ModRM byte, if any
308 * @insn:	&struct insn containing instruction
309 *
310 * Populates @insn->modrm and updates @insn->next_byte to point past the
311 * ModRM byte, if any.  If necessary, first collects the preceding bytes
312 * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
313 *
314 * Returns:
315 * 0:  on success
316 * < 0: on error
317 */
318int insn_get_modrm(struct insn *insn)
319{
320	struct insn_field *modrm = &insn->modrm;
321	insn_byte_t pfx_id, mod;
322	int ret;
323
324	if (modrm->got)
325		return 0;
326
327	if (!insn->opcode.got) {
328		ret = insn_get_opcode(insn);
329		if (ret)
330			return ret;
331	}
332
333	if (inat_has_modrm(insn->attr)) {
334		mod = get_next(insn_byte_t, insn);
335		modrm->value = mod;
336		modrm->nbytes = 1;
337		if (inat_is_group(insn->attr)) {
338			pfx_id = insn_last_prefix_id(insn);
339			insn->attr = inat_get_group_attribute(mod, pfx_id,
340							      insn->attr);
341			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
342				/* Bad insn */
343				insn->attr = 0;
344				return -EINVAL;
345			}
346		}
347	}
348
349	if (insn->x86_64 && inat_is_force64(insn->attr))
350		insn->opnd_bytes = 8;
351
352	modrm->got = 1;
353	return 0;
354
355err_out:
356	return -ENODATA;
357}
358
359
360/**
361 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
362 * @insn:	&struct insn containing instruction
363 *
364 * If necessary, first collects the instruction up to and including the
365 * ModRM byte.  No effect if @insn->x86_64 is 0.
366 */
367int insn_rip_relative(struct insn *insn)
368{
369	struct insn_field *modrm = &insn->modrm;
370	int ret;
371
372	if (!insn->x86_64)
373		return 0;
374
375	if (!modrm->got) {
376		ret = insn_get_modrm(insn);
377		if (ret)
378			return 0;
379	}
380	/*
381	 * For rip-relative instructions, the mod field (top 2 bits)
382	 * is zero and the r/m field (bottom 3 bits) is 0x5.
383	 */
384	return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
385}
386
387/**
388 * insn_get_sib() - Get the SIB byte of instruction
389 * @insn:	&struct insn containing instruction
390 *
391 * If necessary, first collects the instruction up to and including the
392 * ModRM byte.
393 *
394 * Returns:
395 * 0: if decoding succeeded
396 * < 0: otherwise.
397 */
398int insn_get_sib(struct insn *insn)
399{
400	insn_byte_t modrm;
401	int ret;
402
403	if (insn->sib.got)
404		return 0;
405
406	if (!insn->modrm.got) {
407		ret = insn_get_modrm(insn);
408		if (ret)
409			return ret;
410	}
411
412	if (insn->modrm.nbytes) {
413		modrm = (insn_byte_t)insn->modrm.value;
414		if (insn->addr_bytes != 2 &&
415		    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
416			insn->sib.value = get_next(insn_byte_t, insn);
417			insn->sib.nbytes = 1;
418		}
419	}
420	insn->sib.got = 1;
421
422	return 0;
423
424err_out:
425	return -ENODATA;
426}
427
428
429/**
430 * insn_get_displacement() - Get the displacement of instruction
431 * @insn:	&struct insn containing instruction
432 *
433 * If necessary, first collects the instruction up to and including the
434 * SIB byte.
435 * Displacement value is sign-expanded.
436 *
437 * * Returns:
438 * 0: if decoding succeeded
439 * < 0: otherwise.
440 */
441int insn_get_displacement(struct insn *insn)
442{
443	insn_byte_t mod, rm, base;
444	int ret;
445
446	if (insn->displacement.got)
447		return 0;
448
449	if (!insn->sib.got) {
450		ret = insn_get_sib(insn);
451		if (ret)
452			return ret;
453	}
454
455	if (insn->modrm.nbytes) {
456		/*
457		 * Interpreting the modrm byte:
458		 * mod = 00 - no displacement fields (exceptions below)
459		 * mod = 01 - 1-byte displacement field
460		 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
461		 * 	address size = 2 (0x67 prefix in 32-bit mode)
462		 * mod = 11 - no memory operand
463		 *
464		 * If address size = 2...
465		 * mod = 00, r/m = 110 - displacement field is 2 bytes
466		 *
467		 * If address size != 2...
468		 * mod != 11, r/m = 100 - SIB byte exists
469		 * mod = 00, SIB base = 101 - displacement field is 4 bytes
470		 * mod = 00, r/m = 101 - rip-relative addressing, displacement
471		 * 	field is 4 bytes
472		 */
473		mod = X86_MODRM_MOD(insn->modrm.value);
474		rm = X86_MODRM_RM(insn->modrm.value);
475		base = X86_SIB_BASE(insn->sib.value);
476		if (mod == 3)
477			goto out;
478		if (mod == 1) {
479			insn->displacement.value = get_next(signed char, insn);
480			insn->displacement.nbytes = 1;
481		} else if (insn->addr_bytes == 2) {
482			if ((mod == 0 && rm == 6) || mod == 2) {
483				insn->displacement.value =
484					 get_next(short, insn);
485				insn->displacement.nbytes = 2;
486			}
487		} else {
488			if ((mod == 0 && rm == 5) || mod == 2 ||
489			    (mod == 0 && base == 5)) {
490				insn->displacement.value = get_next(int, insn);
491				insn->displacement.nbytes = 4;
492			}
493		}
494	}
495out:
496	insn->displacement.got = 1;
497	return 0;
498
499err_out:
500	return -ENODATA;
501}
502
503/* Decode moffset16/32/64. Return 0 if failed */
504static int __get_moffset(struct insn *insn)
505{
506	switch (insn->addr_bytes) {
507	case 2:
508		insn->moffset1.value = get_next(short, insn);
509		insn->moffset1.nbytes = 2;
510		break;
511	case 4:
512		insn->moffset1.value = get_next(int, insn);
513		insn->moffset1.nbytes = 4;
514		break;
515	case 8:
516		insn->moffset1.value = get_next(int, insn);
517		insn->moffset1.nbytes = 4;
518		insn->moffset2.value = get_next(int, insn);
519		insn->moffset2.nbytes = 4;
520		break;
521	default:	/* opnd_bytes must be modified manually */
522		goto err_out;
523	}
524	insn->moffset1.got = insn->moffset2.got = 1;
525
526	return 1;
527
528err_out:
529	return 0;
530}
531
532/* Decode imm v32(Iz). Return 0 if failed */
533static int __get_immv32(struct insn *insn)
534{
535	switch (insn->opnd_bytes) {
536	case 2:
537		insn->immediate.value = get_next(short, insn);
538		insn->immediate.nbytes = 2;
539		break;
540	case 4:
541	case 8:
542		insn->immediate.value = get_next(int, insn);
543		insn->immediate.nbytes = 4;
544		break;
545	default:	/* opnd_bytes must be modified manually */
546		goto err_out;
547	}
548
549	return 1;
550
551err_out:
552	return 0;
553}
554
555/* Decode imm v64(Iv/Ov), Return 0 if failed */
556static int __get_immv(struct insn *insn)
557{
558	switch (insn->opnd_bytes) {
559	case 2:
560		insn->immediate1.value = get_next(short, insn);
561		insn->immediate1.nbytes = 2;
562		break;
563	case 4:
564		insn->immediate1.value = get_next(int, insn);
565		insn->immediate1.nbytes = 4;
566		break;
567	case 8:
568		insn->immediate1.value = get_next(int, insn);
569		insn->immediate1.nbytes = 4;
570		insn->immediate2.value = get_next(int, insn);
571		insn->immediate2.nbytes = 4;
572		break;
573	default:	/* opnd_bytes must be modified manually */
574		goto err_out;
575	}
576	insn->immediate1.got = insn->immediate2.got = 1;
577
578	return 1;
579err_out:
580	return 0;
581}
582
583/* Decode ptr16:16/32(Ap) */
584static int __get_immptr(struct insn *insn)
585{
586	switch (insn->opnd_bytes) {
587	case 2:
588		insn->immediate1.value = get_next(short, insn);
589		insn->immediate1.nbytes = 2;
590		break;
591	case 4:
592		insn->immediate1.value = get_next(int, insn);
593		insn->immediate1.nbytes = 4;
594		break;
595	case 8:
596		/* ptr16:64 is not exist (no segment) */
597		return 0;
598	default:	/* opnd_bytes must be modified manually */
599		goto err_out;
600	}
601	insn->immediate2.value = get_next(unsigned short, insn);
602	insn->immediate2.nbytes = 2;
603	insn->immediate1.got = insn->immediate2.got = 1;
604
605	return 1;
606err_out:
607	return 0;
608}
609
610/**
611 * insn_get_immediate() - Get the immediate in an instruction
612 * @insn:	&struct insn containing instruction
613 *
614 * If necessary, first collects the instruction up to and including the
615 * displacement bytes.
616 * Basically, most of immediates are sign-expanded. Unsigned-value can be
617 * computed by bit masking with ((1 << (nbytes * 8)) - 1)
618 *
619 * Returns:
620 * 0:  on success
621 * < 0: on error
622 */
623int insn_get_immediate(struct insn *insn)
624{
625	int ret;
626
627	if (insn->immediate.got)
628		return 0;
629
630	if (!insn->displacement.got) {
631		ret = insn_get_displacement(insn);
632		if (ret)
633			return ret;
634	}
635
636	if (inat_has_moffset(insn->attr)) {
637		if (!__get_moffset(insn))
638			goto err_out;
639		goto done;
640	}
641
642	if (!inat_has_immediate(insn->attr))
643		/* no immediates */
644		goto done;
645
646	switch (inat_immediate_size(insn->attr)) {
647	case INAT_IMM_BYTE:
648		insn->immediate.value = get_next(signed char, insn);
649		insn->immediate.nbytes = 1;
650		break;
651	case INAT_IMM_WORD:
652		insn->immediate.value = get_next(short, insn);
653		insn->immediate.nbytes = 2;
654		break;
655	case INAT_IMM_DWORD:
656		insn->immediate.value = get_next(int, insn);
657		insn->immediate.nbytes = 4;
658		break;
659	case INAT_IMM_QWORD:
660		insn->immediate1.value = get_next(int, insn);
661		insn->immediate1.nbytes = 4;
662		insn->immediate2.value = get_next(int, insn);
663		insn->immediate2.nbytes = 4;
664		break;
665	case INAT_IMM_PTR:
666		if (!__get_immptr(insn))
667			goto err_out;
668		break;
669	case INAT_IMM_VWORD32:
670		if (!__get_immv32(insn))
671			goto err_out;
672		break;
673	case INAT_IMM_VWORD:
674		if (!__get_immv(insn))
675			goto err_out;
676		break;
677	default:
678		/* Here, insn must have an immediate, but failed */
679		goto err_out;
680	}
681	if (inat_has_second_immediate(insn->attr)) {
682		insn->immediate2.value = get_next(signed char, insn);
683		insn->immediate2.nbytes = 1;
684	}
685done:
686	insn->immediate.got = 1;
687	return 0;
688
689err_out:
690	return -ENODATA;
691}
692
693/**
694 * insn_get_length() - Get the length of instruction
695 * @insn:	&struct insn containing instruction
696 *
697 * If necessary, first collects the instruction up to and including the
698 * immediates bytes.
699 *
700 * Returns:
701 *  - 0 on success
702 *  - < 0 on error
703*/
704int insn_get_length(struct insn *insn)
705{
706	int ret;
707
708	if (insn->length)
709		return 0;
710
711	if (!insn->immediate.got) {
712		ret = insn_get_immediate(insn);
713		if (ret)
714			return ret;
715	}
716
717	insn->length = (unsigned char)((unsigned long)insn->next_byte
718				     - (unsigned long)insn->kaddr);
719
720	return 0;
721}
722
723/**
724 * insn_decode() - Decode an x86 instruction
725 * @insn:	&struct insn to be initialized
726 * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
727 * @buf_len:	length of the insn buffer at @kaddr
728 * @m:		insn mode, see enum insn_mode
729 *
730 * Returns:
731 * 0: if decoding succeeded
732 * < 0: otherwise.
733 */
734int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
735{
736	int ret;
737
738/* #define INSN_MODE_KERN	-1 __ignore_sync_check__ mode is only valid in the kernel */
739
740	if (m == INSN_MODE_KERN)
741		insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
742	else
743		insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
744
745	ret = insn_get_length(insn);
746	if (ret)
747		return ret;
748
749	if (insn_complete(insn))
750		return 0;
751
752	return -EINVAL;
753}
754