162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
262306a36Sopenharmony_ci#include <linux/linkage.h>
362306a36Sopenharmony_ci#include <asm/asmmacro.h>
462306a36Sopenharmony_ci#include <asm/core.h>
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC16
762306a36Sopenharmony_ci#define XCHAL_NO_MUL 0
862306a36Sopenharmony_ci#else
962306a36Sopenharmony_ci#define XCHAL_NO_MUL 1
1062306a36Sopenharmony_ci#endif
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ciENTRY(__umulsidi3)
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#ifdef __XTENSA_CALL0_ABI__
1562306a36Sopenharmony_ci	abi_entry(32)
1662306a36Sopenharmony_ci	s32i	a12, sp, 16
1762306a36Sopenharmony_ci	s32i	a13, sp, 20
1862306a36Sopenharmony_ci	s32i	a14, sp, 24
1962306a36Sopenharmony_ci	s32i	a15, sp, 28
2062306a36Sopenharmony_ci#elif XCHAL_NO_MUL
2162306a36Sopenharmony_ci	/* This is not really a leaf function; allocate enough stack space
2262306a36Sopenharmony_ci	   to allow CALL12s to a helper function.  */
2362306a36Sopenharmony_ci	abi_entry(32)
2462306a36Sopenharmony_ci#else
2562306a36Sopenharmony_ci	abi_entry_default
2662306a36Sopenharmony_ci#endif
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#ifdef __XTENSA_EB__
2962306a36Sopenharmony_ci#define wh a2
3062306a36Sopenharmony_ci#define wl a3
3162306a36Sopenharmony_ci#else
3262306a36Sopenharmony_ci#define wh a3
3362306a36Sopenharmony_ci#define wl a2
3462306a36Sopenharmony_ci#endif /* __XTENSA_EB__ */
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
3762306a36Sopenharmony_ci	   See more comments there.  */
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#if XCHAL_HAVE_MUL32_HIGH
4062306a36Sopenharmony_ci	mull	a6, a2, a3
4162306a36Sopenharmony_ci	muluh	wh, a2, a3
4262306a36Sopenharmony_ci	mov	wl, a6
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci#else /* ! MUL32_HIGH */
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL
4762306a36Sopenharmony_ci	/* a0 and a8 will be clobbered by calling the multiply function
4862306a36Sopenharmony_ci	   but a8 is not used here and need not be saved.  */
4962306a36Sopenharmony_ci	s32i	a0, sp, 0
5062306a36Sopenharmony_ci#endif
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci#define a2h a4
5562306a36Sopenharmony_ci#define a3h a5
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	/* Get the high halves of the inputs into registers.  */
5862306a36Sopenharmony_ci	srli	a2h, a2, 16
5962306a36Sopenharmony_ci	srli	a3h, a3, 16
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci#define a2l a2
6262306a36Sopenharmony_ci#define a3l a3
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
6562306a36Sopenharmony_ci	/* Clear the high halves of the inputs.  This does not matter
6662306a36Sopenharmony_ci	   for MUL16 because the high bits are ignored.  */
6762306a36Sopenharmony_ci	extui	a2, a2, 0, 16
6862306a36Sopenharmony_ci	extui	a3, a3, 0, 16
6962306a36Sopenharmony_ci#endif
7062306a36Sopenharmony_ci#endif /* MUL16 || MUL32 */
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci#if XCHAL_HAVE_MUL16
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
7662306a36Sopenharmony_ci	mul16u	dst, xreg ## xhalf, yreg ## yhalf
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci#elif XCHAL_HAVE_MUL32
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
8162306a36Sopenharmony_ci	mull	dst, xreg ## xhalf, yreg ## yhalf
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci#elif XCHAL_HAVE_MAC16
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci/* The preprocessor insists on inserting a space when concatenating after
8662306a36Sopenharmony_ci   a period in the definition of do_mul below.  These macros are a workaround
8762306a36Sopenharmony_ci   using underscores instead of periods when doing the concatenation.  */
8862306a36Sopenharmony_ci#define umul_aa_ll umul.aa.ll
8962306a36Sopenharmony_ci#define umul_aa_lh umul.aa.lh
9062306a36Sopenharmony_ci#define umul_aa_hl umul.aa.hl
9162306a36Sopenharmony_ci#define umul_aa_hh umul.aa.hh
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
9462306a36Sopenharmony_ci	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
9562306a36Sopenharmony_ci	rsr	dst, ACCLO
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci#else /* no multiply hardware */
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci#define set_arg_l(dst, src) \
10062306a36Sopenharmony_ci	extui	dst, src, 0, 16
10162306a36Sopenharmony_ci#define set_arg_h(dst, src) \
10262306a36Sopenharmony_ci	srli	dst, src, 16
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci#ifdef __XTENSA_CALL0_ABI__
10562306a36Sopenharmony_ci#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
10662306a36Sopenharmony_ci	set_arg_ ## xhalf (a13, xreg); \
10762306a36Sopenharmony_ci	set_arg_ ## yhalf (a14, yreg); \
10862306a36Sopenharmony_ci	call0	.Lmul_mulsi3; \
10962306a36Sopenharmony_ci	mov	dst, a12
11062306a36Sopenharmony_ci#else
11162306a36Sopenharmony_ci#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
11262306a36Sopenharmony_ci	set_arg_ ## xhalf (a14, xreg); \
11362306a36Sopenharmony_ci	set_arg_ ## yhalf (a15, yreg); \
11462306a36Sopenharmony_ci	call12	.Lmul_mulsi3; \
11562306a36Sopenharmony_ci	mov	dst, a14
11662306a36Sopenharmony_ci#endif /* __XTENSA_CALL0_ABI__ */
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci#endif /* no multiply hardware */
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
12162306a36Sopenharmony_ci	do_mul(a6, a2, l, a3, h)	/* pp 1 */
12262306a36Sopenharmony_ci	do_mul(a11, a2, h, a3, l)	/* pp 2 */
12362306a36Sopenharmony_ci	movi	a9, 0
12462306a36Sopenharmony_ci	add	a6, a6, a11
12562306a36Sopenharmony_ci	bgeu	a6, a11, 1f
12662306a36Sopenharmony_ci	addi	a9, a9, 1
12762306a36Sopenharmony_ci1:
12862306a36Sopenharmony_ci	/* Shift the high half of a9/a6 into position in a9.  Note that
12962306a36Sopenharmony_ci	   this value can be safely incremented without any carry-outs.  */
13062306a36Sopenharmony_ci	ssai	16
13162306a36Sopenharmony_ci	src	a9, a9, a6
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	/* Compute the low word into a6.  */
13462306a36Sopenharmony_ci	do_mul(a11, a2, l, a3, l)	/* pp 0 */
13562306a36Sopenharmony_ci	sll	a6, a6
13662306a36Sopenharmony_ci	add	a6, a6, a11
13762306a36Sopenharmony_ci	bgeu	a6, a11, 1f
13862306a36Sopenharmony_ci	addi	a9, a9, 1
13962306a36Sopenharmony_ci1:
14062306a36Sopenharmony_ci	/* Compute the high word into wh.  */
14162306a36Sopenharmony_ci	do_mul(wh, a2, h, a3, h)	/* pp 3 */
14262306a36Sopenharmony_ci	add	wh, wh, a9
14362306a36Sopenharmony_ci	mov	wl, a6
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci#endif /* !MUL32_HIGH */
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL
14862306a36Sopenharmony_ci	/* Restore the original return address.  */
14962306a36Sopenharmony_ci	l32i	a0, sp, 0
15062306a36Sopenharmony_ci#endif
15162306a36Sopenharmony_ci#ifdef __XTENSA_CALL0_ABI__
15262306a36Sopenharmony_ci	l32i	a12, sp, 16
15362306a36Sopenharmony_ci	l32i	a13, sp, 20
15462306a36Sopenharmony_ci	l32i	a14, sp, 24
15562306a36Sopenharmony_ci	l32i	a15, sp, 28
15662306a36Sopenharmony_ci	abi_ret(32)
15762306a36Sopenharmony_ci#else
15862306a36Sopenharmony_ci	abi_ret_default
15962306a36Sopenharmony_ci#endif
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci#if XCHAL_NO_MUL
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	.macro	do_addx2 dst, as, at, tmp
16462306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX
16562306a36Sopenharmony_ci	addx2	\dst, \as, \at
16662306a36Sopenharmony_ci#else
16762306a36Sopenharmony_ci	slli	\tmp, \as, 1
16862306a36Sopenharmony_ci	add	\dst, \tmp, \at
16962306a36Sopenharmony_ci#endif
17062306a36Sopenharmony_ci	.endm
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	.macro	do_addx4 dst, as, at, tmp
17362306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX
17462306a36Sopenharmony_ci	addx4	\dst, \as, \at
17562306a36Sopenharmony_ci#else
17662306a36Sopenharmony_ci	slli	\tmp, \as, 2
17762306a36Sopenharmony_ci	add	\dst, \tmp, \at
17862306a36Sopenharmony_ci#endif
17962306a36Sopenharmony_ci	.endm
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	.macro	do_addx8 dst, as, at, tmp
18262306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX
18362306a36Sopenharmony_ci	addx8	\dst, \as, \at
18462306a36Sopenharmony_ci#else
18562306a36Sopenharmony_ci	slli	\tmp, \as, 3
18662306a36Sopenharmony_ci	add	\dst, \tmp, \at
18762306a36Sopenharmony_ci#endif
18862306a36Sopenharmony_ci	.endm
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	/* For Xtensa processors with no multiply hardware, this simplified
19162306a36Sopenharmony_ci	   version of _mulsi3 is used for multiplying 16-bit chunks of
19262306a36Sopenharmony_ci	   the floating-point mantissas.  When using CALL0, this function
19362306a36Sopenharmony_ci	   uses a custom ABI: the inputs are passed in a13 and a14, the
19462306a36Sopenharmony_ci	   result is returned in a12, and a8 and a15 are clobbered.  */
19562306a36Sopenharmony_ci	.align	4
19662306a36Sopenharmony_ci.Lmul_mulsi3:
19762306a36Sopenharmony_ci	abi_entry_default
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
20062306a36Sopenharmony_ci	movi	\dst, 0
20162306a36Sopenharmony_ci1:	add	\tmp1, \src2, \dst
20262306a36Sopenharmony_ci	extui	\tmp2, \src1, 0, 1
20362306a36Sopenharmony_ci	movnez	\dst, \tmp1, \tmp2
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	do_addx2 \tmp1, \src2, \dst, \tmp1
20662306a36Sopenharmony_ci	extui	\tmp2, \src1, 1, 1
20762306a36Sopenharmony_ci	movnez	\dst, \tmp1, \tmp2
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	do_addx4 \tmp1, \src2, \dst, \tmp1
21062306a36Sopenharmony_ci	extui	\tmp2, \src1, 2, 1
21162306a36Sopenharmony_ci	movnez	\dst, \tmp1, \tmp2
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	do_addx8 \tmp1, \src2, \dst, \tmp1
21462306a36Sopenharmony_ci	extui	\tmp2, \src1, 3, 1
21562306a36Sopenharmony_ci	movnez	\dst, \tmp1, \tmp2
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	srli	\src1, \src1, 4
21862306a36Sopenharmony_ci	slli	\src2, \src2, 4
21962306a36Sopenharmony_ci	bnez	\src1, 1b
22062306a36Sopenharmony_ci	.endm
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci#ifdef __XTENSA_CALL0_ABI__
22362306a36Sopenharmony_ci	mul_mulsi3_body a12, a13, a14, a15, a8
22462306a36Sopenharmony_ci#else
22562306a36Sopenharmony_ci	/* The result will be written into a2, so save that argument in a4.  */
22662306a36Sopenharmony_ci	mov	a4, a2
22762306a36Sopenharmony_ci	mul_mulsi3_body a2, a4, a3, a5, a6
22862306a36Sopenharmony_ci#endif
22962306a36Sopenharmony_ci	abi_ret_default
23062306a36Sopenharmony_ci#endif /* XCHAL_NO_MUL */
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ciENDPROC(__umulsidi3)
23362306a36Sopenharmony_ciEXPORT_SYMBOL(__umulsidi3)
234