18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2000 Hewlett-Packard Co 48c2ecf20Sopenharmony_ci * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * 32-bit integer division. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * This code is based on the application note entitled "Divide, Square Root 98c2ecf20Sopenharmony_ci * and Remainder Algorithms for the IA-64 Architecture". This document 108c2ecf20Sopenharmony_ci * is available as Intel document number 248725-002 or via the web at 118c2ecf20Sopenharmony_ci * http://developer.intel.com/software/opensource/numerics/ 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * For more details on the theory behind these algorithms, see "IA-64 148c2ecf20Sopenharmony_ci * and Elementary Functions" by Peter Markstein; HP Professional Books 158c2ecf20Sopenharmony_ci * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) 168c2ecf20Sopenharmony_ci */ 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci#include <asm/asmmacro.h> 198c2ecf20Sopenharmony_ci#include <asm/export.h> 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci#ifdef MODULO 228c2ecf20Sopenharmony_ci# define OP mod 238c2ecf20Sopenharmony_ci#else 248c2ecf20Sopenharmony_ci# define OP div 258c2ecf20Sopenharmony_ci#endif 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci#ifdef UNSIGNED 288c2ecf20Sopenharmony_ci# define SGN u 298c2ecf20Sopenharmony_ci# define EXTEND zxt4 308c2ecf20Sopenharmony_ci# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b 318c2ecf20Sopenharmony_ci# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b 328c2ecf20Sopenharmony_ci#else 338c2ecf20Sopenharmony_ci# define SGN 348c2ecf20Sopenharmony_ci# define EXTEND sxt4 358c2ecf20Sopenharmony_ci# define INT_TO_FP(a,b) fcvt.xf a=b 368c2ecf20Sopenharmony_ci# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b 378c2ecf20Sopenharmony_ci#endif 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci#define PASTE1(a,b) a##b 408c2ecf20Sopenharmony_ci#define PASTE(a,b) PASTE1(a,b) 418c2ecf20Sopenharmony_ci#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ciGLOBAL_ENTRY(NAME) 448c2ecf20Sopenharmony_ci .regstk 2,0,0,0 458c2ecf20Sopenharmony_ci // Transfer inputs to FP registers. 468c2ecf20Sopenharmony_ci mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) 478c2ecf20Sopenharmony_ci EXTEND in0 = in0 // in0 = a 488c2ecf20Sopenharmony_ci EXTEND in1 = in1 // in1 = b 498c2ecf20Sopenharmony_ci ;; 508c2ecf20Sopenharmony_ci setf.sig f8 = in0 518c2ecf20Sopenharmony_ci setf.sig f9 = in1 528c2ecf20Sopenharmony_ci#ifdef MODULO 538c2ecf20Sopenharmony_ci sub in1 = r0, in1 // in1 = -b 548c2ecf20Sopenharmony_ci#endif 558c2ecf20Sopenharmony_ci ;; 568c2ecf20Sopenharmony_ci // Convert the inputs to FP, to avoid FP software-assist faults. 578c2ecf20Sopenharmony_ci INT_TO_FP(f8, f8) 588c2ecf20Sopenharmony_ci INT_TO_FP(f9, f9) 598c2ecf20Sopenharmony_ci ;; 608c2ecf20Sopenharmony_ci setf.exp f7 = r2 // f7 = 2^-34 618c2ecf20Sopenharmony_ci frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) 628c2ecf20Sopenharmony_ci ;; 638c2ecf20Sopenharmony_ci(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 648c2ecf20Sopenharmony_ci(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 658c2ecf20Sopenharmony_ci ;; 668c2ecf20Sopenharmony_ci#ifdef MODULO 678c2ecf20Sopenharmony_ci setf.sig f9 = in1 // f9 = -b 688c2ecf20Sopenharmony_ci#endif 698c2ecf20Sopenharmony_ci(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 708c2ecf20Sopenharmony_ci(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 718c2ecf20Sopenharmony_ci ;; 728c2ecf20Sopenharmony_ci#ifdef MODULO 738c2ecf20Sopenharmony_ci setf.sig f7 = in0 748c2ecf20Sopenharmony_ci#endif 758c2ecf20Sopenharmony_ci(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 768c2ecf20Sopenharmony_ci ;; 778c2ecf20Sopenharmony_ci FP_TO_INT(f6, f6) // q = trunc(q2) 788c2ecf20Sopenharmony_ci ;; 798c2ecf20Sopenharmony_ci#ifdef MODULO 808c2ecf20Sopenharmony_ci xma.l f6 = f6, f9, f7 // r = q*(-b) + a 818c2ecf20Sopenharmony_ci ;; 828c2ecf20Sopenharmony_ci#endif 838c2ecf20Sopenharmony_ci getf.sig r8 = f6 // transfer result to result register 848c2ecf20Sopenharmony_ci br.ret.sptk.many rp 858c2ecf20Sopenharmony_ciEND(NAME) 868c2ecf20Sopenharmony_ciEXPORT_SYMBOL(NAME) 87