162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
262306a36Sopenharmony_ci#include <linux/linkage.h>
362306a36Sopenharmony_ci#include <asm/asmmacro.h>
462306a36Sopenharmony_ci#include <asm/core.h>
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci	.macro	do_addx2 dst, as, at, tmp
762306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX
862306a36Sopenharmony_ci	addx2	\dst, \as, \at
962306a36Sopenharmony_ci#else
1062306a36Sopenharmony_ci	slli	\tmp, \as, 1
1162306a36Sopenharmony_ci	add	\dst, \tmp, \at
1262306a36Sopenharmony_ci#endif
1362306a36Sopenharmony_ci	.endm
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci	.macro	do_addx4 dst, as, at, tmp
1662306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX
1762306a36Sopenharmony_ci	addx4	\dst, \as, \at
1862306a36Sopenharmony_ci#else
1962306a36Sopenharmony_ci	slli	\tmp, \as, 2
2062306a36Sopenharmony_ci	add	\dst, \tmp, \at
2162306a36Sopenharmony_ci#endif
2262306a36Sopenharmony_ci	.endm
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci	.macro	do_addx8 dst, as, at, tmp
2562306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX
2662306a36Sopenharmony_ci	addx8	\dst, \as, \at
2762306a36Sopenharmony_ci#else
2862306a36Sopenharmony_ci	slli	\tmp, \as, 3
2962306a36Sopenharmony_ci	add	\dst, \tmp, \at
3062306a36Sopenharmony_ci#endif
3162306a36Sopenharmony_ci	.endm
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ciENTRY(__mulsi3)
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	abi_entry_default
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci#if XCHAL_HAVE_MUL32
3862306a36Sopenharmony_ci	mull	a2, a2, a3
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#elif XCHAL_HAVE_MUL16
4162306a36Sopenharmony_ci	or	a4, a2, a3
4262306a36Sopenharmony_ci	srai	a4, a4, 16
4362306a36Sopenharmony_ci	bnez	a4, .LMUL16
4462306a36Sopenharmony_ci	mul16u	a2, a2, a3
4562306a36Sopenharmony_ci	abi_ret_default
4662306a36Sopenharmony_ci.LMUL16:
4762306a36Sopenharmony_ci	srai	a4, a2, 16
4862306a36Sopenharmony_ci	srai	a5, a3, 16
4962306a36Sopenharmony_ci	mul16u	a7, a4, a3
5062306a36Sopenharmony_ci	mul16u	a6, a5, a2
5162306a36Sopenharmony_ci	mul16u	a4, a2, a3
5262306a36Sopenharmony_ci	add	a7, a7, a6
5362306a36Sopenharmony_ci	slli	a7, a7, 16
5462306a36Sopenharmony_ci	add	a2, a7, a4
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci#elif XCHAL_HAVE_MAC16
5762306a36Sopenharmony_ci	mul.aa.hl a2, a3
5862306a36Sopenharmony_ci	mula.aa.lh a2, a3
5962306a36Sopenharmony_ci	rsr	a5, ACCLO
6062306a36Sopenharmony_ci	umul.aa.ll a2, a3
6162306a36Sopenharmony_ci	rsr	a4, ACCLO
6262306a36Sopenharmony_ci	slli	a5, a5, 16
6362306a36Sopenharmony_ci	add	a2, a4, a5
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci#else /* !MUL32 && !MUL16 && !MAC16 */
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	/* Multiply one bit at a time, but unroll the loop 4x to better
6862306a36Sopenharmony_ci	   exploit the addx instructions and avoid overhead.
6962306a36Sopenharmony_ci	   Peel the first iteration to save a cycle on init.  */
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	/* Avoid negative numbers.  */
7262306a36Sopenharmony_ci	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
7362306a36Sopenharmony_ci	do_abs	a3, a3, a6
7462306a36Sopenharmony_ci	do_abs	a2, a2, a6
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	/* Swap so the second argument is smaller.  */
7762306a36Sopenharmony_ci	sub	a7, a2, a3
7862306a36Sopenharmony_ci	mov	a4, a3
7962306a36Sopenharmony_ci	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
8062306a36Sopenharmony_ci	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	movi	a2, 0
8362306a36Sopenharmony_ci	extui	a6, a3, 0, 1
8462306a36Sopenharmony_ci	movnez	a2, a4, a6
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	do_addx2 a7, a4, a2, a7
8762306a36Sopenharmony_ci	extui	a6, a3, 1, 1
8862306a36Sopenharmony_ci	movnez	a2, a7, a6
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	do_addx4 a7, a4, a2, a7
9162306a36Sopenharmony_ci	extui	a6, a3, 2, 1
9262306a36Sopenharmony_ci	movnez	a2, a7, a6
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	do_addx8 a7, a4, a2, a7
9562306a36Sopenharmony_ci	extui	a6, a3, 3, 1
9662306a36Sopenharmony_ci	movnez	a2, a7, a6
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	bgeui	a3, 16, .Lmult_main_loop
9962306a36Sopenharmony_ci	neg	a3, a2
10062306a36Sopenharmony_ci	movltz	a2, a3, a5
10162306a36Sopenharmony_ci	abi_ret_default
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	.align	4
10462306a36Sopenharmony_ci.Lmult_main_loop:
10562306a36Sopenharmony_ci	srli	a3, a3, 4
10662306a36Sopenharmony_ci	slli	a4, a4, 4
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	add	a7, a4, a2
10962306a36Sopenharmony_ci	extui	a6, a3, 0, 1
11062306a36Sopenharmony_ci	movnez	a2, a7, a6
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	do_addx2 a7, a4, a2, a7
11362306a36Sopenharmony_ci	extui	a6, a3, 1, 1
11462306a36Sopenharmony_ci	movnez	a2, a7, a6
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	do_addx4 a7, a4, a2, a7
11762306a36Sopenharmony_ci	extui	a6, a3, 2, 1
11862306a36Sopenharmony_ci	movnez	a2, a7, a6
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	do_addx8 a7, a4, a2, a7
12162306a36Sopenharmony_ci	extui	a6, a3, 3, 1
12262306a36Sopenharmony_ci	movnez	a2, a7, a6
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	bgeui	a3, 16, .Lmult_main_loop
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	neg	a3, a2
12762306a36Sopenharmony_ci	movltz	a2, a3, a5
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci#endif /* !MUL32 && !MUL16 && !MAC16 */
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	abi_ret_default
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ciENDPROC(__mulsi3)
13462306a36Sopenharmony_ciEXPORT_SYMBOL(__mulsi3)
135