162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ 262306a36Sopenharmony_ci#include <linux/linkage.h> 362306a36Sopenharmony_ci#include <asm/asmmacro.h> 462306a36Sopenharmony_ci#include <asm/core.h> 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci .macro do_addx2 dst, as, at, tmp 762306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX 862306a36Sopenharmony_ci addx2 \dst, \as, \at 962306a36Sopenharmony_ci#else 1062306a36Sopenharmony_ci slli \tmp, \as, 1 1162306a36Sopenharmony_ci add \dst, \tmp, \at 1262306a36Sopenharmony_ci#endif 1362306a36Sopenharmony_ci .endm 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci .macro do_addx4 dst, as, at, tmp 1662306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX 1762306a36Sopenharmony_ci addx4 \dst, \as, \at 1862306a36Sopenharmony_ci#else 1962306a36Sopenharmony_ci slli \tmp, \as, 2 2062306a36Sopenharmony_ci add \dst, \tmp, \at 2162306a36Sopenharmony_ci#endif 2262306a36Sopenharmony_ci .endm 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci .macro do_addx8 dst, as, at, tmp 2562306a36Sopenharmony_ci#if XCHAL_HAVE_ADDX 2662306a36Sopenharmony_ci addx8 \dst, \as, \at 2762306a36Sopenharmony_ci#else 2862306a36Sopenharmony_ci slli \tmp, \as, 3 2962306a36Sopenharmony_ci add \dst, \tmp, \at 3062306a36Sopenharmony_ci#endif 3162306a36Sopenharmony_ci .endm 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ciENTRY(__mulsi3) 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci abi_entry_default 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#if XCHAL_HAVE_MUL32 3862306a36Sopenharmony_ci mull a2, a2, a3 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci#elif XCHAL_HAVE_MUL16 4162306a36Sopenharmony_ci or a4, a2, a3 4262306a36Sopenharmony_ci srai a4, a4, 16 4362306a36Sopenharmony_ci bnez a4, .LMUL16 4462306a36Sopenharmony_ci mul16u a2, a2, a3 4562306a36Sopenharmony_ci abi_ret_default 4662306a36Sopenharmony_ci.LMUL16: 4762306a36Sopenharmony_ci srai a4, a2, 16 4862306a36Sopenharmony_ci srai a5, a3, 16 4962306a36Sopenharmony_ci mul16u a7, a4, a3 5062306a36Sopenharmony_ci mul16u a6, a5, a2 5162306a36Sopenharmony_ci mul16u a4, a2, a3 5262306a36Sopenharmony_ci add a7, a7, a6 5362306a36Sopenharmony_ci slli a7, a7, 16 5462306a36Sopenharmony_ci add a2, a7, a4 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#elif XCHAL_HAVE_MAC16 5762306a36Sopenharmony_ci mul.aa.hl a2, a3 5862306a36Sopenharmony_ci mula.aa.lh a2, a3 5962306a36Sopenharmony_ci rsr a5, ACCLO 6062306a36Sopenharmony_ci umul.aa.ll a2, a3 6162306a36Sopenharmony_ci rsr a4, ACCLO 6262306a36Sopenharmony_ci slli a5, a5, 16 6362306a36Sopenharmony_ci add a2, a4, a5 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci#else /* !MUL32 && !MUL16 && !MAC16 */ 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci /* Multiply one bit at a time, but unroll the loop 4x to better 6862306a36Sopenharmony_ci exploit the addx instructions and avoid overhead. 6962306a36Sopenharmony_ci Peel the first iteration to save a cycle on init. */ 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci /* Avoid negative numbers. */ 7262306a36Sopenharmony_ci xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ 7362306a36Sopenharmony_ci do_abs a3, a3, a6 7462306a36Sopenharmony_ci do_abs a2, a2, a6 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci /* Swap so the second argument is smaller. */ 7762306a36Sopenharmony_ci sub a7, a2, a3 7862306a36Sopenharmony_ci mov a4, a3 7962306a36Sopenharmony_ci movgez a4, a2, a7 /* a4 = max (a2, a3) */ 8062306a36Sopenharmony_ci movltz a3, a2, a7 /* a3 = min (a2, a3) */ 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci movi a2, 0 8362306a36Sopenharmony_ci extui a6, a3, 0, 1 8462306a36Sopenharmony_ci movnez a2, a4, a6 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci do_addx2 a7, a4, a2, a7 8762306a36Sopenharmony_ci extui a6, a3, 1, 1 8862306a36Sopenharmony_ci movnez a2, a7, a6 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci do_addx4 a7, a4, a2, a7 9162306a36Sopenharmony_ci extui a6, a3, 2, 1 9262306a36Sopenharmony_ci movnez a2, a7, a6 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci do_addx8 a7, a4, a2, a7 9562306a36Sopenharmony_ci extui a6, a3, 3, 1 9662306a36Sopenharmony_ci movnez a2, a7, a6 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci bgeui a3, 16, .Lmult_main_loop 9962306a36Sopenharmony_ci neg a3, a2 10062306a36Sopenharmony_ci movltz a2, a3, a5 10162306a36Sopenharmony_ci abi_ret_default 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci .align 4 10462306a36Sopenharmony_ci.Lmult_main_loop: 10562306a36Sopenharmony_ci srli a3, a3, 4 10662306a36Sopenharmony_ci slli a4, a4, 4 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci add a7, a4, a2 10962306a36Sopenharmony_ci extui a6, a3, 0, 1 11062306a36Sopenharmony_ci movnez a2, a7, a6 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci do_addx2 a7, a4, a2, a7 11362306a36Sopenharmony_ci extui a6, a3, 1, 1 11462306a36Sopenharmony_ci movnez a2, a7, a6 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci do_addx4 a7, a4, a2, a7 11762306a36Sopenharmony_ci extui a6, a3, 2, 1 11862306a36Sopenharmony_ci movnez a2, a7, a6 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci do_addx8 a7, a4, a2, a7 12162306a36Sopenharmony_ci extui a6, a3, 3, 1 12262306a36Sopenharmony_ci movnez a2, a7, a6 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci bgeui a3, 16, .Lmult_main_loop 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci neg a3, a2 12762306a36Sopenharmony_ci movltz a2, a3, a5 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci#endif /* !MUL32 && !MUL16 && !MAC16 */ 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci abi_ret_default 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ciENDPROC(__mulsi3) 13462306a36Sopenharmony_ciEXPORT_SYMBOL(__mulsi3) 135