18c2ecf20Sopenharmony_ci;; SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci;;  Copyright 2010  Free Software Foundation, Inc.
38c2ecf20Sopenharmony_ci;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
48c2ecf20Sopenharmony_ci;;
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/linkage.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci	;; ABI considerations for the divide functions
98c2ecf20Sopenharmony_ci	;; The following registers are call-used:
108c2ecf20Sopenharmony_ci	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
118c2ecf20Sopenharmony_ci	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
128c2ecf20Sopenharmony_ci	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
138c2ecf20Sopenharmony_ci	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
148c2ecf20Sopenharmony_ci	;;
158c2ecf20Sopenharmony_ci	;; In our implementation, divu and remu are leaf functions,
168c2ecf20Sopenharmony_ci	;; while both divi and remi call into divu.
178c2ecf20Sopenharmony_ci	;; A0 is not clobbered by any of the functions.
188c2ecf20Sopenharmony_ci	;; divu does not clobber B2 either, which is taken advantage of
198c2ecf20Sopenharmony_ci	;; in remi.
208c2ecf20Sopenharmony_ci	;; divi uses B5 to hold the original return address during
218c2ecf20Sopenharmony_ci	;; the call to divu.
228c2ecf20Sopenharmony_ci	;; remi uses B2 and A5 to hold the input values during the
238c2ecf20Sopenharmony_ci	;; call to divu.  It stores B3 in on the stack.
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci	.text
268c2ecf20Sopenharmony_ciENTRY(__c6xabi_divu)
278c2ecf20Sopenharmony_ci	;; We use a series of up to 31 subc instructions.  First, we find
288c2ecf20Sopenharmony_ci	;; out how many leading zero bits there are in the divisor.  This
298c2ecf20Sopenharmony_ci	;; gives us both a shift count for aligning (shifting) the divisor
308c2ecf20Sopenharmony_ci	;; to the, and the number of times we have to execute subc.
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	;; At the end, we have both the remainder and most of the quotient
338c2ecf20Sopenharmony_ci	;; in A4.  The top bit of the quotient is computed first and is
348c2ecf20Sopenharmony_ci	;; placed in A2.
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	;; Return immediately if the dividend is zero.
378c2ecf20Sopenharmony_ci	 mv	.s2x	A4, B1
388c2ecf20Sopenharmony_ci   [B1]	 lmbd	.l2	1, B4, B1
398c2ecf20Sopenharmony_ci|| [!B1] b	.s2	B3	; RETURN A
408c2ecf20Sopenharmony_ci|| [!B1] mvk	.d2	1, B4
418c2ecf20Sopenharmony_ci	 mv	.l1x	B1, A6
428c2ecf20Sopenharmony_ci||	 shl	.s2	B4, B1, B4
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	;; The loop performs a maximum of 28 steps, so we do the
458c2ecf20Sopenharmony_ci	;; first 3 here.
468c2ecf20Sopenharmony_ci	 cmpltu	.l1x	A4, B4, A2
478c2ecf20Sopenharmony_ci   [!A2] sub	.l1x	A4, B4, A4
488c2ecf20Sopenharmony_ci||	 shru	.s2	B4, 1, B4
498c2ecf20Sopenharmony_ci||	 xor	.s1	1, A2, A2
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	 shl	.s1	A2, 31, A2
528c2ecf20Sopenharmony_ci|| [B1]	 subc	.l1x	A4,B4,A4
538c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
548c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
558c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	;; RETURN A may happen here (note: must happen before the next branch)
588c2ecf20Sopenharmony_ci_divu_loop:
598c2ecf20Sopenharmony_ci	 cmpgt	.l2	B1, 7, B0
608c2ecf20Sopenharmony_ci|| [B1]	 subc	.l1x	A4,B4,A4
618c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
628c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
638c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
648c2ecf20Sopenharmony_ci|| [B0]  b	.s1	_divu_loop
658c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
668c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
678c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
688c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
698c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
708c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
718c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
728c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
738c2ecf20Sopenharmony_ci   [B1]	 subc	.l1x	A4,B4,A4
748c2ecf20Sopenharmony_ci|| [B1]	 add	.s2	-1, B1, B1
758c2ecf20Sopenharmony_ci	;; loop backwards branch happens here
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	 ret	.s2	B3
788c2ecf20Sopenharmony_ci||	 mvk	.s1	32, A1
798c2ecf20Sopenharmony_ci	 sub	.l1	A1, A6, A6
808c2ecf20Sopenharmony_ci	 shl	.s1	A4, A6, A4
818c2ecf20Sopenharmony_ci	 shru	.s1	A4, 1, A4
828c2ecf20Sopenharmony_ci||	 sub	.l1	A6, 1, A6
838c2ecf20Sopenharmony_ci	 or	.l1	A2, A4, A4
848c2ecf20Sopenharmony_ci	 shru	.s1	A4, A6, A4
858c2ecf20Sopenharmony_ci	 nop
868c2ecf20Sopenharmony_ciENDPROC(__c6xabi_divu)
87