18c2ecf20Sopenharmony_ci|
28c2ecf20Sopenharmony_ci|	stanh.sa 3.1 12/10/90
38c2ecf20Sopenharmony_ci|
48c2ecf20Sopenharmony_ci|	The entry point sTanh computes the hyperbolic tangent of
58c2ecf20Sopenharmony_ci|	an input argument; sTanhd does the same except for denormalized
68c2ecf20Sopenharmony_ci|	input.
78c2ecf20Sopenharmony_ci|
88c2ecf20Sopenharmony_ci|	Input: Double-extended number X in location pointed to
98c2ecf20Sopenharmony_ci|		by address register a0.
108c2ecf20Sopenharmony_ci|
118c2ecf20Sopenharmony_ci|	Output: The value tanh(X) returned in floating-point register Fp0.
128c2ecf20Sopenharmony_ci|
138c2ecf20Sopenharmony_ci|	Accuracy and Monotonicity: The returned result is within 3 ulps in
148c2ecf20Sopenharmony_ci|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
158c2ecf20Sopenharmony_ci|		result is subsequently rounded to double precision. The
168c2ecf20Sopenharmony_ci|		result is provably monotonic in double precision.
178c2ecf20Sopenharmony_ci|
188c2ecf20Sopenharmony_ci|	Speed: The program stanh takes approximately 270 cycles.
198c2ecf20Sopenharmony_ci|
208c2ecf20Sopenharmony_ci|	Algorithm:
218c2ecf20Sopenharmony_ci|
228c2ecf20Sopenharmony_ci|	TANH
238c2ecf20Sopenharmony_ci|	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
248c2ecf20Sopenharmony_ci|
258c2ecf20Sopenharmony_ci|	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
268c2ecf20Sopenharmony_ci|		sgn := sign(X), y := 2|X|, z := expm1(Y), and
278c2ecf20Sopenharmony_ci|		tanh(X) = sgn*( z/(2+z) ).
288c2ecf20Sopenharmony_ci|		Exit.
298c2ecf20Sopenharmony_ci|
308c2ecf20Sopenharmony_ci|	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
318c2ecf20Sopenharmony_ci|		go to 7.
328c2ecf20Sopenharmony_ci|
338c2ecf20Sopenharmony_ci|	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
348c2ecf20Sopenharmony_ci|
358c2ecf20Sopenharmony_ci|	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
368c2ecf20Sopenharmony_ci|		sgn := sign(X), y := 2|X|, z := exp(Y),
378c2ecf20Sopenharmony_ci|		tanh(X) = sgn - [ sgn*2/(1+z) ].
388c2ecf20Sopenharmony_ci|		Exit.
398c2ecf20Sopenharmony_ci|
408c2ecf20Sopenharmony_ci|	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
418c2ecf20Sopenharmony_ci|		calculate Tanh(X) by
428c2ecf20Sopenharmony_ci|		sgn := sign(X), Tiny := 2**(-126),
438c2ecf20Sopenharmony_ci|		tanh(X) := sgn - sgn*Tiny.
448c2ecf20Sopenharmony_ci|		Exit.
458c2ecf20Sopenharmony_ci|
468c2ecf20Sopenharmony_ci|	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.
478c2ecf20Sopenharmony_ci|
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci|		Copyright (C) Motorola, Inc. 1990
508c2ecf20Sopenharmony_ci|			All Rights Reserved
518c2ecf20Sopenharmony_ci|
528c2ecf20Sopenharmony_ci|       For details on the license for this file, please see the
538c2ecf20Sopenharmony_ci|       file, README, in this same directory.
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci|STANH	idnt	2,1 | Motorola 040 Floating Point Software Package
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	|section	8
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci#include "fpsp.h"
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	.set	X,FP_SCR5
628c2ecf20Sopenharmony_ci	.set	XDCARE,X+2
638c2ecf20Sopenharmony_ci	.set	XFRAC,X+4
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	.set	SGN,L_SCR3
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	.set	V,FP_SCR6
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ciBOUNDS1:	.long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	|xref	t_frcinx
728c2ecf20Sopenharmony_ci	|xref	t_extdnrm
738c2ecf20Sopenharmony_ci	|xref	setox
748c2ecf20Sopenharmony_ci	|xref	setoxm1
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	.global	stanhd
778c2ecf20Sopenharmony_cistanhd:
788c2ecf20Sopenharmony_ci|--TANH(X) = X FOR DENORMALIZED X
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	bra		t_extdnrm
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	.global	stanh
838c2ecf20Sopenharmony_cistanh:
848c2ecf20Sopenharmony_ci	fmovex		(%a0),%fp0	| ...LOAD INPUT
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	fmovex		%fp0,X(%a6)
878c2ecf20Sopenharmony_ci	movel		(%a0),%d0
888c2ecf20Sopenharmony_ci	movew		4(%a0),%d0
898c2ecf20Sopenharmony_ci	movel		%d0,X(%a6)
908c2ecf20Sopenharmony_ci	andl		#0x7FFFFFFF,%d0
918c2ecf20Sopenharmony_ci	cmp2l		BOUNDS1(%pc),%d0	| ...2**(-40) < |X| < (5/2)LOG2 ?
928c2ecf20Sopenharmony_ci	bcss		TANHBORS
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci|--THIS IS THE USUAL CASE
958c2ecf20Sopenharmony_ci|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	movel		X(%a6),%d0
988c2ecf20Sopenharmony_ci	movel		%d0,SGN(%a6)
998c2ecf20Sopenharmony_ci	andl		#0x7FFF0000,%d0
1008c2ecf20Sopenharmony_ci	addl		#0x00010000,%d0	| ...EXPONENT OF 2|X|
1018c2ecf20Sopenharmony_ci	movel		%d0,X(%a6)
1028c2ecf20Sopenharmony_ci	andl		#0x80000000,SGN(%a6)
1038c2ecf20Sopenharmony_ci	fmovex		X(%a6),%fp0		| ...FP0 IS Y = 2|X|
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	movel		%d1,-(%a7)
1068c2ecf20Sopenharmony_ci	clrl		%d1
1078c2ecf20Sopenharmony_ci	fmovemx	%fp0-%fp0,(%a0)
1088c2ecf20Sopenharmony_ci	bsr		setoxm1		| ...FP0 IS Z = EXPM1(Y)
1098c2ecf20Sopenharmony_ci	movel		(%a7)+,%d1
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	fmovex		%fp0,%fp1
1128c2ecf20Sopenharmony_ci	fadds		#0x40000000,%fp1	| ...Z+2
1138c2ecf20Sopenharmony_ci	movel		SGN(%a6),%d0
1148c2ecf20Sopenharmony_ci	fmovex		%fp1,V(%a6)
1158c2ecf20Sopenharmony_ci	eorl		%d0,V(%a6)
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	fmovel		%d1,%FPCR		|restore users exceptions
1188c2ecf20Sopenharmony_ci	fdivx		V(%a6),%fp0
1198c2ecf20Sopenharmony_ci	bra		t_frcinx
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ciTANHBORS:
1228c2ecf20Sopenharmony_ci	cmpl		#0x3FFF8000,%d0
1238c2ecf20Sopenharmony_ci	blt		TANHSM
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	cmpl		#0x40048AA1,%d0
1268c2ecf20Sopenharmony_ci	bgt		TANHHUGE
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci|-- (5/2) LOG2 < |X| < 50 LOG2,
1298c2ecf20Sopenharmony_ci|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
1308c2ecf20Sopenharmony_ci|--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	movel		X(%a6),%d0
1338c2ecf20Sopenharmony_ci	movel		%d0,SGN(%a6)
1348c2ecf20Sopenharmony_ci	andl		#0x7FFF0000,%d0
1358c2ecf20Sopenharmony_ci	addl		#0x00010000,%d0	| ...EXPO OF 2|X|
1368c2ecf20Sopenharmony_ci	movel		%d0,X(%a6)		| ...Y = 2|X|
1378c2ecf20Sopenharmony_ci	andl		#0x80000000,SGN(%a6)
1388c2ecf20Sopenharmony_ci	movel		SGN(%a6),%d0
1398c2ecf20Sopenharmony_ci	fmovex		X(%a6),%fp0		| ...Y = 2|X|
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	movel		%d1,-(%a7)
1428c2ecf20Sopenharmony_ci	clrl		%d1
1438c2ecf20Sopenharmony_ci	fmovemx	%fp0-%fp0,(%a0)
1448c2ecf20Sopenharmony_ci	bsr		setox		| ...FP0 IS EXP(Y)
1458c2ecf20Sopenharmony_ci	movel		(%a7)+,%d1
1468c2ecf20Sopenharmony_ci	movel		SGN(%a6),%d0
1478c2ecf20Sopenharmony_ci	fadds		#0x3F800000,%fp0	| ...EXP(Y)+1
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	eorl		#0xC0000000,%d0	| ...-SIGN(X)*2
1508c2ecf20Sopenharmony_ci	fmoves		%d0,%fp1		| ...-SIGN(X)*2 IN SGL FMT
1518c2ecf20Sopenharmony_ci	fdivx		%fp0,%fp1		| ...-SIGN(X)2 / [EXP(Y)+1 ]
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	movel		SGN(%a6),%d0
1548c2ecf20Sopenharmony_ci	orl		#0x3F800000,%d0	| ...SGN
1558c2ecf20Sopenharmony_ci	fmoves		%d0,%fp0		| ...SGN IN SGL FMT
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	fmovel		%d1,%FPCR		|restore users exceptions
1588c2ecf20Sopenharmony_ci	faddx		%fp1,%fp0
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	bra		t_frcinx
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ciTANHSM:
1638c2ecf20Sopenharmony_ci	movew		#0x0000,XDCARE(%a6)
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	fmovel		%d1,%FPCR		|restore users exceptions
1668c2ecf20Sopenharmony_ci	fmovex		X(%a6),%fp0		|last inst - possible exception set
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	bra		t_frcinx
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ciTANHHUGE:
1718c2ecf20Sopenharmony_ci|---RETURN SGN(X) - SGN(X)EPS
1728c2ecf20Sopenharmony_ci	movel		X(%a6),%d0
1738c2ecf20Sopenharmony_ci	andl		#0x80000000,%d0
1748c2ecf20Sopenharmony_ci	orl		#0x3F800000,%d0
1758c2ecf20Sopenharmony_ci	fmoves		%d0,%fp0
1768c2ecf20Sopenharmony_ci	andl		#0x80000000,%d0
1778c2ecf20Sopenharmony_ci	eorl		#0x80800000,%d0	| ...-SIGN(X)*EPS
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	fmovel		%d1,%FPCR		|restore users exceptions
1808c2ecf20Sopenharmony_ci	fadds		%d0,%fp0
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	bra		t_frcinx
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	|end
185