18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * FP/SIMD state saving and restoring macros
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2012 ARM Ltd.
68c2ecf20Sopenharmony_ci * Author: Catalin Marinas <catalin.marinas@arm.com>
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci.macro fpsimd_save state, tmpnr
108c2ecf20Sopenharmony_ci	stp	q0, q1, [\state, #16 * 0]
118c2ecf20Sopenharmony_ci	stp	q2, q3, [\state, #16 * 2]
128c2ecf20Sopenharmony_ci	stp	q4, q5, [\state, #16 * 4]
138c2ecf20Sopenharmony_ci	stp	q6, q7, [\state, #16 * 6]
148c2ecf20Sopenharmony_ci	stp	q8, q9, [\state, #16 * 8]
158c2ecf20Sopenharmony_ci	stp	q10, q11, [\state, #16 * 10]
168c2ecf20Sopenharmony_ci	stp	q12, q13, [\state, #16 * 12]
178c2ecf20Sopenharmony_ci	stp	q14, q15, [\state, #16 * 14]
188c2ecf20Sopenharmony_ci	stp	q16, q17, [\state, #16 * 16]
198c2ecf20Sopenharmony_ci	stp	q18, q19, [\state, #16 * 18]
208c2ecf20Sopenharmony_ci	stp	q20, q21, [\state, #16 * 20]
218c2ecf20Sopenharmony_ci	stp	q22, q23, [\state, #16 * 22]
228c2ecf20Sopenharmony_ci	stp	q24, q25, [\state, #16 * 24]
238c2ecf20Sopenharmony_ci	stp	q26, q27, [\state, #16 * 26]
248c2ecf20Sopenharmony_ci	stp	q28, q29, [\state, #16 * 28]
258c2ecf20Sopenharmony_ci	stp	q30, q31, [\state, #16 * 30]!
268c2ecf20Sopenharmony_ci	mrs	x\tmpnr, fpsr
278c2ecf20Sopenharmony_ci	str	w\tmpnr, [\state, #16 * 2]
288c2ecf20Sopenharmony_ci	mrs	x\tmpnr, fpcr
298c2ecf20Sopenharmony_ci	str	w\tmpnr, [\state, #16 * 2 + 4]
308c2ecf20Sopenharmony_ci.endm
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci.macro fpsimd_restore_fpcr state, tmp
338c2ecf20Sopenharmony_ci	/*
348c2ecf20Sopenharmony_ci	 * Writes to fpcr may be self-synchronising, so avoid restoring
358c2ecf20Sopenharmony_ci	 * the register if it hasn't changed.
368c2ecf20Sopenharmony_ci	 */
378c2ecf20Sopenharmony_ci	mrs	\tmp, fpcr
388c2ecf20Sopenharmony_ci	cmp	\tmp, \state
398c2ecf20Sopenharmony_ci	b.eq	9999f
408c2ecf20Sopenharmony_ci	msr	fpcr, \state
418c2ecf20Sopenharmony_ci9999:
428c2ecf20Sopenharmony_ci.endm
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci/* Clobbers \state */
458c2ecf20Sopenharmony_ci.macro fpsimd_restore state, tmpnr
468c2ecf20Sopenharmony_ci	ldp	q0, q1, [\state, #16 * 0]
478c2ecf20Sopenharmony_ci	ldp	q2, q3, [\state, #16 * 2]
488c2ecf20Sopenharmony_ci	ldp	q4, q5, [\state, #16 * 4]
498c2ecf20Sopenharmony_ci	ldp	q6, q7, [\state, #16 * 6]
508c2ecf20Sopenharmony_ci	ldp	q8, q9, [\state, #16 * 8]
518c2ecf20Sopenharmony_ci	ldp	q10, q11, [\state, #16 * 10]
528c2ecf20Sopenharmony_ci	ldp	q12, q13, [\state, #16 * 12]
538c2ecf20Sopenharmony_ci	ldp	q14, q15, [\state, #16 * 14]
548c2ecf20Sopenharmony_ci	ldp	q16, q17, [\state, #16 * 16]
558c2ecf20Sopenharmony_ci	ldp	q18, q19, [\state, #16 * 18]
568c2ecf20Sopenharmony_ci	ldp	q20, q21, [\state, #16 * 20]
578c2ecf20Sopenharmony_ci	ldp	q22, q23, [\state, #16 * 22]
588c2ecf20Sopenharmony_ci	ldp	q24, q25, [\state, #16 * 24]
598c2ecf20Sopenharmony_ci	ldp	q26, q27, [\state, #16 * 26]
608c2ecf20Sopenharmony_ci	ldp	q28, q29, [\state, #16 * 28]
618c2ecf20Sopenharmony_ci	ldp	q30, q31, [\state, #16 * 30]!
628c2ecf20Sopenharmony_ci	ldr	w\tmpnr, [\state, #16 * 2]
638c2ecf20Sopenharmony_ci	msr	fpsr, x\tmpnr
648c2ecf20Sopenharmony_ci	ldr	w\tmpnr, [\state, #16 * 2 + 4]
658c2ecf20Sopenharmony_ci	fpsimd_restore_fpcr x\tmpnr, \state
668c2ecf20Sopenharmony_ci.endm
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci/* Sanity-check macros to help avoid encoding garbage instructions */
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci.macro _check_general_reg nr
718c2ecf20Sopenharmony_ci	.if (\nr) < 0 || (\nr) > 30
728c2ecf20Sopenharmony_ci		.error "Bad register number \nr."
738c2ecf20Sopenharmony_ci	.endif
748c2ecf20Sopenharmony_ci.endm
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci.macro _sve_check_zreg znr
778c2ecf20Sopenharmony_ci	.if (\znr) < 0 || (\znr) > 31
788c2ecf20Sopenharmony_ci		.error "Bad Scalable Vector Extension vector register number \znr."
798c2ecf20Sopenharmony_ci	.endif
808c2ecf20Sopenharmony_ci.endm
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci.macro _sve_check_preg pnr
838c2ecf20Sopenharmony_ci	.if (\pnr) < 0 || (\pnr) > 15
848c2ecf20Sopenharmony_ci		.error "Bad Scalable Vector Extension predicate register number \pnr."
858c2ecf20Sopenharmony_ci	.endif
868c2ecf20Sopenharmony_ci.endm
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci.macro _check_num n, min, max
898c2ecf20Sopenharmony_ci	.if (\n) < (\min) || (\n) > (\max)
908c2ecf20Sopenharmony_ci		.error "Number \n out of range [\min,\max]"
918c2ecf20Sopenharmony_ci	.endif
928c2ecf20Sopenharmony_ci.endm
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci/* SVE instruction encodings for non-SVE-capable assemblers */
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
978c2ecf20Sopenharmony_ci.macro _sve_str_v nz, nxbase, offset=0
988c2ecf20Sopenharmony_ci	_sve_check_zreg \nz
998c2ecf20Sopenharmony_ci	_check_general_reg \nxbase
1008c2ecf20Sopenharmony_ci	_check_num (\offset), -0x100, 0xff
1018c2ecf20Sopenharmony_ci	.inst	0xe5804000			\
1028c2ecf20Sopenharmony_ci		| (\nz)				\
1038c2ecf20Sopenharmony_ci		| ((\nxbase) << 5)		\
1048c2ecf20Sopenharmony_ci		| (((\offset) & 7) << 10)	\
1058c2ecf20Sopenharmony_ci		| (((\offset) & 0x1f8) << 13)
1068c2ecf20Sopenharmony_ci.endm
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
1098c2ecf20Sopenharmony_ci.macro _sve_ldr_v nz, nxbase, offset=0
1108c2ecf20Sopenharmony_ci	_sve_check_zreg \nz
1118c2ecf20Sopenharmony_ci	_check_general_reg \nxbase
1128c2ecf20Sopenharmony_ci	_check_num (\offset), -0x100, 0xff
1138c2ecf20Sopenharmony_ci	.inst	0x85804000			\
1148c2ecf20Sopenharmony_ci		| (\nz)				\
1158c2ecf20Sopenharmony_ci		| ((\nxbase) << 5)		\
1168c2ecf20Sopenharmony_ci		| (((\offset) & 7) << 10)	\
1178c2ecf20Sopenharmony_ci		| (((\offset) & 0x1f8) << 13)
1188c2ecf20Sopenharmony_ci.endm
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
1218c2ecf20Sopenharmony_ci.macro _sve_str_p np, nxbase, offset=0
1228c2ecf20Sopenharmony_ci	_sve_check_preg \np
1238c2ecf20Sopenharmony_ci	_check_general_reg \nxbase
1248c2ecf20Sopenharmony_ci	_check_num (\offset), -0x100, 0xff
1258c2ecf20Sopenharmony_ci	.inst	0xe5800000			\
1268c2ecf20Sopenharmony_ci		| (\np)				\
1278c2ecf20Sopenharmony_ci		| ((\nxbase) << 5)		\
1288c2ecf20Sopenharmony_ci		| (((\offset) & 7) << 10)	\
1298c2ecf20Sopenharmony_ci		| (((\offset) & 0x1f8) << 13)
1308c2ecf20Sopenharmony_ci.endm
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
1338c2ecf20Sopenharmony_ci.macro _sve_ldr_p np, nxbase, offset=0
1348c2ecf20Sopenharmony_ci	_sve_check_preg \np
1358c2ecf20Sopenharmony_ci	_check_general_reg \nxbase
1368c2ecf20Sopenharmony_ci	_check_num (\offset), -0x100, 0xff
1378c2ecf20Sopenharmony_ci	.inst	0x85800000			\
1388c2ecf20Sopenharmony_ci		| (\np)				\
1398c2ecf20Sopenharmony_ci		| ((\nxbase) << 5)		\
1408c2ecf20Sopenharmony_ci		| (((\offset) & 7) << 10)	\
1418c2ecf20Sopenharmony_ci		| (((\offset) & 0x1f8) << 13)
1428c2ecf20Sopenharmony_ci.endm
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci/* RDVL X\nx, #\imm */
1458c2ecf20Sopenharmony_ci.macro _sve_rdvl nx, imm
1468c2ecf20Sopenharmony_ci	_check_general_reg \nx
1478c2ecf20Sopenharmony_ci	_check_num (\imm), -0x20, 0x1f
1488c2ecf20Sopenharmony_ci	.inst	0x04bf5000			\
1498c2ecf20Sopenharmony_ci		| (\nx)				\
1508c2ecf20Sopenharmony_ci		| (((\imm) & 0x3f) << 5)
1518c2ecf20Sopenharmony_ci.endm
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci/* RDFFR (unpredicated): RDFFR P\np.B */
1548c2ecf20Sopenharmony_ci.macro _sve_rdffr np
1558c2ecf20Sopenharmony_ci	_sve_check_preg \np
1568c2ecf20Sopenharmony_ci	.inst	0x2519f000			\
1578c2ecf20Sopenharmony_ci		| (\np)
1588c2ecf20Sopenharmony_ci.endm
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci/* WRFFR P\np.B */
1618c2ecf20Sopenharmony_ci.macro _sve_wrffr np
1628c2ecf20Sopenharmony_ci	_sve_check_preg \np
1638c2ecf20Sopenharmony_ci	.inst	0x25289000			\
1648c2ecf20Sopenharmony_ci		| ((\np) << 5)
1658c2ecf20Sopenharmony_ci.endm
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci/* PFALSE P\np.B */
1688c2ecf20Sopenharmony_ci.macro _sve_pfalse np
1698c2ecf20Sopenharmony_ci	_sve_check_preg \np
1708c2ecf20Sopenharmony_ci	.inst	0x2518e400			\
1718c2ecf20Sopenharmony_ci		| (\np)
1728c2ecf20Sopenharmony_ci.endm
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci.macro __for from:req, to:req
1758c2ecf20Sopenharmony_ci	.if (\from) == (\to)
1768c2ecf20Sopenharmony_ci		_for__body %\from
1778c2ecf20Sopenharmony_ci	.else
1788c2ecf20Sopenharmony_ci		__for %\from, %((\from) + ((\to) - (\from)) / 2)
1798c2ecf20Sopenharmony_ci		__for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
1808c2ecf20Sopenharmony_ci	.endif
1818c2ecf20Sopenharmony_ci.endm
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci.macro _for var:req, from:req, to:req, insn:vararg
1848c2ecf20Sopenharmony_ci	.macro _for__body \var:req
1858c2ecf20Sopenharmony_ci		.noaltmacro
1868c2ecf20Sopenharmony_ci		\insn
1878c2ecf20Sopenharmony_ci		.altmacro
1888c2ecf20Sopenharmony_ci	.endm
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	.altmacro
1918c2ecf20Sopenharmony_ci	__for \from, \to
1928c2ecf20Sopenharmony_ci	.noaltmacro
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	.purgem _for__body
1958c2ecf20Sopenharmony_ci.endm
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci/* Update ZCR_EL1.LEN with the new VQ */
1988c2ecf20Sopenharmony_ci.macro sve_load_vq xvqminus1, xtmp, xtmp2
1998c2ecf20Sopenharmony_ci		mrs_s		\xtmp, SYS_ZCR_EL1
2008c2ecf20Sopenharmony_ci		bic		\xtmp2, \xtmp, ZCR_ELx_LEN_MASK
2018c2ecf20Sopenharmony_ci		orr		\xtmp2, \xtmp2, \xvqminus1
2028c2ecf20Sopenharmony_ci		cmp		\xtmp2, \xtmp
2038c2ecf20Sopenharmony_ci		b.eq		921f
2048c2ecf20Sopenharmony_ci		msr_s		SYS_ZCR_EL1, \xtmp2	//self-synchronising
2058c2ecf20Sopenharmony_ci921:
2068c2ecf20Sopenharmony_ci.endm
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci/* Preserve the first 128-bits of Znz and zero the rest. */
2098c2ecf20Sopenharmony_ci.macro _sve_flush_z nz
2108c2ecf20Sopenharmony_ci	_sve_check_zreg \nz
2118c2ecf20Sopenharmony_ci	mov	v\nz\().16b, v\nz\().16b
2128c2ecf20Sopenharmony_ci.endm
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci.macro sve_flush
2158c2ecf20Sopenharmony_ci _for n, 0, 31, _sve_flush_z	\n
2168c2ecf20Sopenharmony_ci _for n, 0, 15, _sve_pfalse	\n
2178c2ecf20Sopenharmony_ci		_sve_wrffr	0
2188c2ecf20Sopenharmony_ci.endm
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci.macro sve_save nxbase, xpfpsr, nxtmp
2218c2ecf20Sopenharmony_ci _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
2228c2ecf20Sopenharmony_ci _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
2238c2ecf20Sopenharmony_ci		_sve_rdffr	0
2248c2ecf20Sopenharmony_ci		_sve_str_p	0, \nxbase
2258c2ecf20Sopenharmony_ci		_sve_ldr_p	0, \nxbase, -16
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci		mrs		x\nxtmp, fpsr
2288c2ecf20Sopenharmony_ci		str		w\nxtmp, [\xpfpsr]
2298c2ecf20Sopenharmony_ci		mrs		x\nxtmp, fpcr
2308c2ecf20Sopenharmony_ci		str		w\nxtmp, [\xpfpsr, #4]
2318c2ecf20Sopenharmony_ci.endm
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
2348c2ecf20Sopenharmony_ci		sve_load_vq	\xvqminus1, x\nxtmp, \xtmp2
2358c2ecf20Sopenharmony_ci _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
2368c2ecf20Sopenharmony_ci		_sve_ldr_p	0, \nxbase
2378c2ecf20Sopenharmony_ci		_sve_wrffr	0
2388c2ecf20Sopenharmony_ci _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci		ldr		w\nxtmp, [\xpfpsr]
2418c2ecf20Sopenharmony_ci		msr		fpsr, x\nxtmp
2428c2ecf20Sopenharmony_ci		ldr		w\nxtmp, [\xpfpsr, #4]
2438c2ecf20Sopenharmony_ci		msr		fpcr, x\nxtmp
2448c2ecf20Sopenharmony_ci.endm
245