18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2000 Hewlett-Packard Co
48c2ecf20Sopenharmony_ci * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * 32-bit integer division.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * This code is based on the application note entitled "Divide, Square Root
98c2ecf20Sopenharmony_ci * and Remainder Algorithms for the IA-64 Architecture".  This document
108c2ecf20Sopenharmony_ci * is available as Intel document number 248725-002 or via the web at
118c2ecf20Sopenharmony_ci * http://developer.intel.com/software/opensource/numerics/
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * For more details on the theory behind these algorithms, see "IA-64
148c2ecf20Sopenharmony_ci * and Elementary Functions" by Peter Markstein; HP Professional Books
158c2ecf20Sopenharmony_ci * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
168c2ecf20Sopenharmony_ci */
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci#include <asm/asmmacro.h>
198c2ecf20Sopenharmony_ci#include <asm/export.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#ifdef MODULO
228c2ecf20Sopenharmony_ci# define OP	mod
238c2ecf20Sopenharmony_ci#else
248c2ecf20Sopenharmony_ci# define OP	div
258c2ecf20Sopenharmony_ci#endif
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#ifdef UNSIGNED
288c2ecf20Sopenharmony_ci# define SGN	u
298c2ecf20Sopenharmony_ci# define EXTEND	zxt4
308c2ecf20Sopenharmony_ci# define INT_TO_FP(a,b)	fcvt.xuf.s1 a=b
318c2ecf20Sopenharmony_ci# define FP_TO_INT(a,b)	fcvt.fxu.trunc.s1 a=b
328c2ecf20Sopenharmony_ci#else
338c2ecf20Sopenharmony_ci# define SGN
348c2ecf20Sopenharmony_ci# define EXTEND	sxt4
358c2ecf20Sopenharmony_ci# define INT_TO_FP(a,b)	fcvt.xf a=b
368c2ecf20Sopenharmony_ci# define FP_TO_INT(a,b)	fcvt.fx.trunc.s1 a=b
378c2ecf20Sopenharmony_ci#endif
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci#define PASTE1(a,b)	a##b
408c2ecf20Sopenharmony_ci#define PASTE(a,b)	PASTE1(a,b)
418c2ecf20Sopenharmony_ci#define NAME		PASTE(PASTE(__,SGN),PASTE(OP,si3))
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ciGLOBAL_ENTRY(NAME)
448c2ecf20Sopenharmony_ci	.regstk 2,0,0,0
458c2ecf20Sopenharmony_ci	// Transfer inputs to FP registers.
468c2ecf20Sopenharmony_ci	mov r2 = 0xffdd			// r2 = -34 + 65535 (fp reg format bias)
478c2ecf20Sopenharmony_ci	EXTEND in0 = in0		// in0 = a
488c2ecf20Sopenharmony_ci	EXTEND in1 = in1		// in1 = b
498c2ecf20Sopenharmony_ci	;;
508c2ecf20Sopenharmony_ci	setf.sig f8 = in0
518c2ecf20Sopenharmony_ci	setf.sig f9 = in1
528c2ecf20Sopenharmony_ci#ifdef MODULO
538c2ecf20Sopenharmony_ci	sub in1 = r0, in1		// in1 = -b
548c2ecf20Sopenharmony_ci#endif
558c2ecf20Sopenharmony_ci	;;
568c2ecf20Sopenharmony_ci	// Convert the inputs to FP, to avoid FP software-assist faults.
578c2ecf20Sopenharmony_ci	INT_TO_FP(f8, f8)
588c2ecf20Sopenharmony_ci	INT_TO_FP(f9, f9)
598c2ecf20Sopenharmony_ci	;;
608c2ecf20Sopenharmony_ci	setf.exp f7 = r2		// f7 = 2^-34
618c2ecf20Sopenharmony_ci	frcpa.s1 f6, p6 = f8, f9	// y0 = frcpa(b)
628c2ecf20Sopenharmony_ci	;;
638c2ecf20Sopenharmony_ci(p6)	fmpy.s1 f8 = f8, f6		// q0 = a*y0
648c2ecf20Sopenharmony_ci(p6)	fnma.s1 f6 = f9, f6, f1		// e0 = -b*y0 + 1
658c2ecf20Sopenharmony_ci	;;
668c2ecf20Sopenharmony_ci#ifdef MODULO
678c2ecf20Sopenharmony_ci	setf.sig f9 = in1		// f9 = -b
688c2ecf20Sopenharmony_ci#endif
698c2ecf20Sopenharmony_ci(p6)	fma.s1 f8 = f6, f8, f8		// q1 = e0*q0 + q0
708c2ecf20Sopenharmony_ci(p6)	fma.s1 f6 = f6, f6, f7		// e1 = e0*e0 + 2^-34
718c2ecf20Sopenharmony_ci	;;
728c2ecf20Sopenharmony_ci#ifdef MODULO
738c2ecf20Sopenharmony_ci	setf.sig f7 = in0
748c2ecf20Sopenharmony_ci#endif
758c2ecf20Sopenharmony_ci(p6)	fma.s1 f6 = f6, f8, f8		// q2 = e1*q1 + q1
768c2ecf20Sopenharmony_ci	;;
778c2ecf20Sopenharmony_ci	FP_TO_INT(f6, f6)		// q = trunc(q2)
788c2ecf20Sopenharmony_ci	;;
798c2ecf20Sopenharmony_ci#ifdef MODULO
808c2ecf20Sopenharmony_ci	xma.l f6 = f6, f9, f7		// r = q*(-b) + a
818c2ecf20Sopenharmony_ci	;;
828c2ecf20Sopenharmony_ci#endif
838c2ecf20Sopenharmony_ci	getf.sig r8 = f6		// transfer result to result register
848c2ecf20Sopenharmony_ci	br.ret.sptk.many rp
858c2ecf20Sopenharmony_ciEND(NAME)
868c2ecf20Sopenharmony_ciEXPORT_SYMBOL(NAME)
87