18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  Copyright (C) 1994-2002 Russell King
48c2ecf20Sopenharmony_ci *  Copyright (c) 2003, 2020 ARM Limited
58c2ecf20Sopenharmony_ci *  All Rights Reserved
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/init.h>
98c2ecf20Sopenharmony_ci#include <linux/linkage.h>
108c2ecf20Sopenharmony_ci#include <asm/assembler.h>
118c2ecf20Sopenharmony_ci#include <asm/page.h>
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#ifdef __ARMEB__
148c2ecf20Sopenharmony_ci#define LOW_OFFSET	0x4
158c2ecf20Sopenharmony_ci#define HIGH_OFFSET	0x0
168c2ecf20Sopenharmony_ci#else
178c2ecf20Sopenharmony_ci#define LOW_OFFSET	0x0
188c2ecf20Sopenharmony_ci#define HIGH_OFFSET	0x4
198c2ecf20Sopenharmony_ci#endif
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci/*
228c2ecf20Sopenharmony_ci * __fixup_pv_table - patch the stub instructions with the delta between
238c2ecf20Sopenharmony_ci *                    PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
248c2ecf20Sopenharmony_ci *                    2 MiB aligned.
258c2ecf20Sopenharmony_ci *
268c2ecf20Sopenharmony_ci * Called from head.S, which expects the following registers to be preserved:
278c2ecf20Sopenharmony_ci *   r1 = machine no, r2 = atags or dtb,
288c2ecf20Sopenharmony_ci *   r8 = phys_offset, r9 = cpuid, r10 = procinfo
298c2ecf20Sopenharmony_ci */
308c2ecf20Sopenharmony_ci	__HEAD
318c2ecf20Sopenharmony_ciENTRY(__fixup_pv_table)
328c2ecf20Sopenharmony_ci	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
338c2ecf20Sopenharmony_ci	str_l	r0, __pv_phys_pfn_offset, r3
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	adr_l	r0, __pv_offset
368c2ecf20Sopenharmony_ci	subs	r3, r8, #PAGE_OFFSET	@ PHYS_OFFSET - PAGE_OFFSET
378c2ecf20Sopenharmony_ci	mvn	ip, #0
388c2ecf20Sopenharmony_ci	strcc	ip, [r0, #HIGH_OFFSET]	@ save to __pv_offset high bits
398c2ecf20Sopenharmony_ci	str	r3, [r0, #LOW_OFFSET]	@ save to __pv_offset low bits
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	mov	r0, r3, lsr #21		@ constant for add/sub instructions
428c2ecf20Sopenharmony_ci	teq	r3, r0, lsl #21 	@ must be 2 MiB aligned
438c2ecf20Sopenharmony_ci	bne	0f
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	adr_l	r4, __pv_table_begin
468c2ecf20Sopenharmony_ci	adr_l	r5, __pv_table_end
478c2ecf20Sopenharmony_ci	b	__fixup_a_pv_table
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci0:	mov	r0, r0			@ deadloop on error
508c2ecf20Sopenharmony_ci	b	0b
518c2ecf20Sopenharmony_ciENDPROC(__fixup_pv_table)
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	.text
548c2ecf20Sopenharmony_ci__fixup_a_pv_table:
558c2ecf20Sopenharmony_ci	adr_l	r6, __pv_offset
568c2ecf20Sopenharmony_ci	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
578c2ecf20Sopenharmony_ci	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
588c2ecf20Sopenharmony_ci	cmn	r0, #1
598c2ecf20Sopenharmony_ci#ifdef CONFIG_THUMB2_KERNEL
608c2ecf20Sopenharmony_ci	@
618c2ecf20Sopenharmony_ci	@ The Thumb-2 versions of the patchable sequences are
628c2ecf20Sopenharmony_ci	@
638c2ecf20Sopenharmony_ci	@ phys-to-virt:			movw	<reg>, #offset<31:21>
648c2ecf20Sopenharmony_ci	@				lsl	<reg>, #21
658c2ecf20Sopenharmony_ci	@				sub	<VA>, <PA>, <reg>
668c2ecf20Sopenharmony_ci	@
678c2ecf20Sopenharmony_ci	@ virt-to-phys (non-LPAE):	movw	<reg>, #offset<31:21>
688c2ecf20Sopenharmony_ci	@				lsl	<reg>, #21
698c2ecf20Sopenharmony_ci	@				add	<PA>, <VA>, <reg>
708c2ecf20Sopenharmony_ci	@
718c2ecf20Sopenharmony_ci	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:21>
728c2ecf20Sopenharmony_ci	@				lsl	<reg>, #21
738c2ecf20Sopenharmony_ci	@				adds	<PAlo>, <VA>, <reg>
748c2ecf20Sopenharmony_ci	@				mov	<PAhi>, #offset<39:32>
758c2ecf20Sopenharmony_ci	@				adc	<PAhi>, <PAhi>, #0
768c2ecf20Sopenharmony_ci	@
778c2ecf20Sopenharmony_ci	@ In the non-LPAE case, all patchable instructions are MOVW
788c2ecf20Sopenharmony_ci	@ instructions, where we need to patch in the offset into the
798c2ecf20Sopenharmony_ci	@ second halfword of the opcode (the 16-bit immediate is encoded
808c2ecf20Sopenharmony_ci	@ as imm4:i:imm3:imm8)
818c2ecf20Sopenharmony_ci	@
828c2ecf20Sopenharmony_ci	@       15       11 10  9           4 3    0  15  14  12 11 8 7    0
838c2ecf20Sopenharmony_ci	@      +-----------+---+-------------+------++---+------+----+------+
848c2ecf20Sopenharmony_ci	@ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
858c2ecf20Sopenharmony_ci	@      +-----------+---+-------------+------++---+------+----+------+
868c2ecf20Sopenharmony_ci	@
878c2ecf20Sopenharmony_ci	@ In the LPAE case, we also need to patch in the high word of the
888c2ecf20Sopenharmony_ci	@ offset into the immediate field of the MOV instruction, or patch it
898c2ecf20Sopenharmony_ci	@ to a MVN instruction if the offset is negative. In this case, we
908c2ecf20Sopenharmony_ci	@ need to inspect the first halfword of the opcode, to check whether
918c2ecf20Sopenharmony_ci	@ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
928c2ecf20Sopenharmony_ci	@ needed. The encoding of the immediate is rather complex for values
938c2ecf20Sopenharmony_ci	@ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
948c2ecf20Sopenharmony_ci	@ order bits, which can be patched into imm8 directly (and i:imm3
958c2ecf20Sopenharmony_ci	@ cleared)
968c2ecf20Sopenharmony_ci	@
978c2ecf20Sopenharmony_ci	@      15       11 10  9        5         0  15  14  12 11 8 7    0
988c2ecf20Sopenharmony_ci	@     +-----------+---+---------------------++---+------+----+------+
998c2ecf20Sopenharmony_ci	@ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
1008c2ecf20Sopenharmony_ci	@ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
1018c2ecf20Sopenharmony_ci	@     +-----------+---+---------------------++---+------+----+------+
1028c2ecf20Sopenharmony_ci	@
1038c2ecf20Sopenharmony_ci	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
1048c2ecf20Sopenharmony_ci	lsrs	r3, r6, #29		@ isolate top 3 bits of displacement
1058c2ecf20Sopenharmony_ci	ubfx	r6, r6, #21, #8		@ put bits 28:21 into the MOVW imm8 field
1068c2ecf20Sopenharmony_ci	bfi	r6, r3, #12, #3		@ put bits 31:29 into the MOVW imm3 field
1078c2ecf20Sopenharmony_ci	b	.Lnext
1088c2ecf20Sopenharmony_ci.Lloop:	add	r7, r4
1098c2ecf20Sopenharmony_ci	adds	r4, #4			@ clears Z flag
1108c2ecf20Sopenharmony_ci#ifdef CONFIG_ARM_LPAE
1118c2ecf20Sopenharmony_ci	ldrh	ip, [r7]
1128c2ecf20Sopenharmony_ciARM_BE8(rev16	ip, ip)
1138c2ecf20Sopenharmony_ci	tst	ip, #0x200		@ MOVW has bit 9 set, MVN has it clear
1148c2ecf20Sopenharmony_ci	bne	0f			@ skip to MOVW handling (Z flag is clear)
1158c2ecf20Sopenharmony_ci	bic	ip, #0x20		@ clear bit 5 (MVN -> MOV)
1168c2ecf20Sopenharmony_ci	orr	ip, ip, r0, lsr #16	@ MOV -> MVN if offset < 0
1178c2ecf20Sopenharmony_ciARM_BE8(rev16	ip, ip)
1188c2ecf20Sopenharmony_ci	strh	ip, [r7]
1198c2ecf20Sopenharmony_ci	@ Z flag is set
1208c2ecf20Sopenharmony_ci0:
1218c2ecf20Sopenharmony_ci#endif
1228c2ecf20Sopenharmony_ci	ldrh	ip, [r7, #2]
1238c2ecf20Sopenharmony_ciARM_BE8(rev16	ip, ip)
1248c2ecf20Sopenharmony_ci	and	ip, #0xf00		@ clear everything except Rd field
1258c2ecf20Sopenharmony_ci	orreq	ip, r0			@ Z flag set -> MOV/MVN -> patch in high bits
1268c2ecf20Sopenharmony_ci	orrne	ip, r6			@ Z flag clear -> MOVW -> patch in low bits
1278c2ecf20Sopenharmony_ciARM_BE8(rev16	ip, ip)
1288c2ecf20Sopenharmony_ci	strh	ip, [r7, #2]
1298c2ecf20Sopenharmony_ci#else
1308c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_ENDIAN_BE8
1318c2ecf20Sopenharmony_ci@ in BE8, we load data in BE, but instructions still in LE
1328c2ecf20Sopenharmony_ci#define PV_BIT24	0x00000001
1338c2ecf20Sopenharmony_ci#define PV_IMM8_MASK	0xff000000
1348c2ecf20Sopenharmony_ci#define PV_IMMR_MSB	0x00080000
1358c2ecf20Sopenharmony_ci#else
1368c2ecf20Sopenharmony_ci#define PV_BIT24	0x01000000
1378c2ecf20Sopenharmony_ci#define PV_IMM8_MASK	0x000000ff
1388c2ecf20Sopenharmony_ci#define PV_IMMR_MSB	0x00000800
1398c2ecf20Sopenharmony_ci#endif
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	@
1428c2ecf20Sopenharmony_ci	@ The ARM versions of the patchable sequences are
1438c2ecf20Sopenharmony_ci	@
1448c2ecf20Sopenharmony_ci	@ phys-to-virt:			sub	<VA>, <PA>, #offset<31:24>, lsl #24
1458c2ecf20Sopenharmony_ci	@				sub	<VA>, <PA>, #offset<23:16>, lsl #16
1468c2ecf20Sopenharmony_ci	@
1478c2ecf20Sopenharmony_ci	@ virt-to-phys (non-LPAE):	add	<PA>, <VA>, #offset<31:24>, lsl #24
1488c2ecf20Sopenharmony_ci	@				add	<PA>, <VA>, #offset<23:16>, lsl #16
1498c2ecf20Sopenharmony_ci	@
1508c2ecf20Sopenharmony_ci	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:20>
1518c2ecf20Sopenharmony_ci	@				adds	<PAlo>, <VA>, <reg>, lsl #20
1528c2ecf20Sopenharmony_ci	@				mov	<PAhi>, #offset<39:32>
1538c2ecf20Sopenharmony_ci	@				adc	<PAhi>, <PAhi>, #0
1548c2ecf20Sopenharmony_ci	@
1558c2ecf20Sopenharmony_ci	@ In the non-LPAE case, all patchable instructions are ADD or SUB
1568c2ecf20Sopenharmony_ci	@ instructions, where we need to patch in the offset into the
1578c2ecf20Sopenharmony_ci	@ immediate field of the opcode, which is emitted with the correct
1588c2ecf20Sopenharmony_ci	@ rotation value. (The effective value of the immediate is imm12<7:0>
1598c2ecf20Sopenharmony_ci	@ rotated right by [2 * imm12<11:8>] bits)
1608c2ecf20Sopenharmony_ci	@
1618c2ecf20Sopenharmony_ci	@      31   28 27      23 22  20 19  16 15  12 11    0
1628c2ecf20Sopenharmony_ci	@      +------+-----------------+------+------+-------+
1638c2ecf20Sopenharmony_ci	@  ADD | cond | 0 0 1 0 1 0 0 0 |  Rn  |  Rd  | imm12 |
1648c2ecf20Sopenharmony_ci	@  SUB | cond | 0 0 1 0 0 1 0 0 |  Rn  |  Rd  | imm12 |
1658c2ecf20Sopenharmony_ci	@  MOV | cond | 0 0 1 1 1 0 1 0 |  Rn  |  Rd  | imm12 |
1668c2ecf20Sopenharmony_ci	@  MVN | cond | 0 0 1 1 1 1 1 0 |  Rn  |  Rd  | imm12 |
1678c2ecf20Sopenharmony_ci	@      +------+-----------------+------+------+-------+
1688c2ecf20Sopenharmony_ci	@
1698c2ecf20Sopenharmony_ci	@ In the LPAE case, we use a MOVW instruction to carry the low offset
1708c2ecf20Sopenharmony_ci	@ word, and patch in the high word of the offset into the immediate
1718c2ecf20Sopenharmony_ci	@ field of the subsequent MOV instruction, or patch it to a MVN
1728c2ecf20Sopenharmony_ci	@ instruction if the offset is negative. We can distinguish MOVW
1738c2ecf20Sopenharmony_ci	@ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
1748c2ecf20Sopenharmony_ci	@ distinguished from MOV/MVN (all using the encodings above) using
1758c2ecf20Sopenharmony_ci	@ bit 24.
1768c2ecf20Sopenharmony_ci	@
1778c2ecf20Sopenharmony_ci	@      31   28 27      23 22  20 19  16 15  12 11    0
1788c2ecf20Sopenharmony_ci	@      +------+-----------------+------+------+-------+
1798c2ecf20Sopenharmony_ci	@ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 |  Rd  | imm12 |
1808c2ecf20Sopenharmony_ci	@      +------+-----------------+------+------+-------+
1818c2ecf20Sopenharmony_ci	@
1828c2ecf20Sopenharmony_ci	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
1838c2ecf20Sopenharmony_ci	mov	r3, r6, lsr #16		@ put offset bits 31-16 into r3
1848c2ecf20Sopenharmony_ci	mov	r6, r6, lsr #24		@ put offset bits 31-24 into r6
1858c2ecf20Sopenharmony_ci	and	r3, r3, #0xf0		@ only keep offset bits 23-20 in r3
1868c2ecf20Sopenharmony_ci	b	.Lnext
1878c2ecf20Sopenharmony_ci.Lloop:	ldr	ip, [r7, r4]
1888c2ecf20Sopenharmony_ci#ifdef CONFIG_ARM_LPAE
1898c2ecf20Sopenharmony_ci	tst	ip, #PV_BIT24		@ ADD/SUB have bit 24 clear
1908c2ecf20Sopenharmony_ci	beq	1f
1918c2ecf20Sopenharmony_ciARM_BE8(rev	ip, ip)
1928c2ecf20Sopenharmony_ci	tst	ip, #0xc00000		@ MOVW has bits 23:22 clear
1938c2ecf20Sopenharmony_ci	bic	ip, ip, #0x400000	@ clear bit 22
1948c2ecf20Sopenharmony_ci	bfc	ip, #0, #12		@ clear imm12 field of MOV[W] instruction
1958c2ecf20Sopenharmony_ci	orreq	ip, ip, r6, lsl #4	@ MOVW -> mask in offset bits 31-24
1968c2ecf20Sopenharmony_ci	orreq	ip, ip, r3, lsr #4	@ MOVW -> mask in offset bits 23-20
1978c2ecf20Sopenharmony_ci	orrne	ip, ip, r0		@ MOV  -> mask in offset bits 7-0 (or bit 22)
1988c2ecf20Sopenharmony_ciARM_BE8(rev	ip, ip)
1998c2ecf20Sopenharmony_ci	b	2f
2008c2ecf20Sopenharmony_ci1:
2018c2ecf20Sopenharmony_ci#endif
2028c2ecf20Sopenharmony_ci	tst	ip, #PV_IMMR_MSB		@ rotation value >= 16 ?
2038c2ecf20Sopenharmony_ci	bic	ip, ip, #PV_IMM8_MASK
2048c2ecf20Sopenharmony_ci	orreq	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
2058c2ecf20Sopenharmony_ci	orrne	ip, ip, r3 ARM_BE8(, lsl #24)	@ mask in offset bits 23-20
2068c2ecf20Sopenharmony_ci2:
2078c2ecf20Sopenharmony_ci	str	ip, [r7, r4]
2088c2ecf20Sopenharmony_ci	add	r4, r4, #4
2098c2ecf20Sopenharmony_ci#endif
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci.Lnext:
2128c2ecf20Sopenharmony_ci	cmp	r4, r5
2138c2ecf20Sopenharmony_ci	ldrcc	r7, [r4]		@ use branch for delay slot
2148c2ecf20Sopenharmony_ci	bcc	.Lloop
2158c2ecf20Sopenharmony_ci	ret	lr
2168c2ecf20Sopenharmony_ciENDPROC(__fixup_a_pv_table)
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ciENTRY(fixup_pv_table)
2198c2ecf20Sopenharmony_ci	stmfd	sp!, {r4 - r7, lr}
2208c2ecf20Sopenharmony_ci	mov	r4, r0			@ r0 = table start
2218c2ecf20Sopenharmony_ci	add	r5, r0, r1		@ r1 = table size
2228c2ecf20Sopenharmony_ci	bl	__fixup_a_pv_table
2238c2ecf20Sopenharmony_ci	ldmfd	sp!, {r4 - r7, pc}
2248c2ecf20Sopenharmony_ciENDPROC(fixup_pv_table)
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	.data
2278c2ecf20Sopenharmony_ci	.align	2
2288c2ecf20Sopenharmony_ci	.globl	__pv_phys_pfn_offset
2298c2ecf20Sopenharmony_ci	.type	__pv_phys_pfn_offset, %object
2308c2ecf20Sopenharmony_ci__pv_phys_pfn_offset:
2318c2ecf20Sopenharmony_ci	.word	0
2328c2ecf20Sopenharmony_ci	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	.globl	__pv_offset
2358c2ecf20Sopenharmony_ci	.type	__pv_offset, %object
2368c2ecf20Sopenharmony_ci__pv_offset:
2378c2ecf20Sopenharmony_ci	.quad	0
2388c2ecf20Sopenharmony_ci	.size	__pv_offset, . -__pv_offset
239