18c2ecf20Sopenharmony_ci;; SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci;;  Copyright (C) 2010 Texas Instruments Incorporated
38c2ecf20Sopenharmony_ci;;  Contributed by Mark Salter <msalter@redhat.com>.
48c2ecf20Sopenharmony_ci;;
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/linkage.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci	;; uint64_t __c6xabi_mpyll(uint64_t x, uint64_t y)
98c2ecf20Sopenharmony_ci	;;
108c2ecf20Sopenharmony_ci	;; 64x64 multiply
118c2ecf20Sopenharmony_ci	;; First compute partial results using 32-bit parts of x and y:
128c2ecf20Sopenharmony_ci	;;
138c2ecf20Sopenharmony_ci	;;   b63	 b32 b31	  b0
148c2ecf20Sopenharmony_ci	;;    -----------------------------
158c2ecf20Sopenharmony_ci	;;    |      1	    |	   0	  |
168c2ecf20Sopenharmony_ci	;;    -----------------------------
178c2ecf20Sopenharmony_ci	;;
188c2ecf20Sopenharmony_ci	;;   P0 = X0*Y0
198c2ecf20Sopenharmony_ci	;;   P1 = X0*Y1 + X1*Y0
208c2ecf20Sopenharmony_ci	;;   P2 = X1*Y1
218c2ecf20Sopenharmony_ci	;;
228c2ecf20Sopenharmony_ci	;;   result = (P2 << 64) + (P1 << 32) + P0
238c2ecf20Sopenharmony_ci	;;
248c2ecf20Sopenharmony_ci	;; Since the result is also 64-bit, we can skip the P2 term.
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci	.text
278c2ecf20Sopenharmony_ciENTRY(__c6xabi_mpyll)
288c2ecf20Sopenharmony_ci	mpy32u	.m1x	A4,B4,A1:A0	; X0*Y0
298c2ecf20Sopenharmony_ci	b	.s2	B3
308c2ecf20Sopenharmony_ci ||	mpy32u	.m2x	B5,A4,B1:B0	; X0*Y1 (don't need upper 32-bits)
318c2ecf20Sopenharmony_ci ||	mpy32u	.m1x	A5,B4,A3:A2	; X1*Y0 (don't need upper 32-bits)
328c2ecf20Sopenharmony_ci	nop
338c2ecf20Sopenharmony_ci	nop
348c2ecf20Sopenharmony_ci	mv	.s1	A0,A4
358c2ecf20Sopenharmony_ci	add	.l1x	A2,B0,A5
368c2ecf20Sopenharmony_ci	add	.s1	A1,A5,A5
378c2ecf20Sopenharmony_ciENDPROC(__c6xabi_mpyll)
38