1 |
2 |	stwotox.sa 3.1 12/10/90
3 |
4 |	stwotox  --- 2**X
5 |	stwotoxd --- 2**X for denormalized X
6 |	stentox  --- 10**X
7 |	stentoxd --- 10**X for denormalized X
8 |
9 |	Input: Double-extended number X in location pointed to
10 |		by address register a0.
11 |
12 |	Output: The function values are returned in Fp0.
13 |
14 |	Accuracy and Monotonicity: The returned result is within 2 ulps in
15 |		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16 |		result is subsequently rounded to double precision. The
17 |		result is provably monotonic in double precision.
18 |
19 |	Speed: The program stwotox takes approximately 190 cycles and the
20 |		program stentox takes approximately 200 cycles.
21 |
22 |	Algorithm:
23 |
24 |	twotox
25 |	1. If |X| > 16480, go to ExpBig.
26 |
27 |	2. If |X| < 2**(-70), go to ExpSm.
28 |
29 |	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30 |		decompose N as
31 |		 N = 64(M + M') + j,  j = 0,1,2,...,63.
32 |
33 |	4. Overwrite r := r * log2. Then
34 |		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35 |		Go to expr to compute that expression.
36 |
37 |	tentox
38 |	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39 |
40 |	2. If |X| < 2**(-70), go to ExpSm.
41 |
42 |	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43 |		N := round-to-int(y). Decompose N as
44 |		 N = 64(M + M') + j,  j = 0,1,2,...,63.
45 |
46 |	4. Define r as
47 |		r := ((X - N*L1)-N*L2) * L10
48 |		where L1, L2 are the leading and trailing parts of log_10(2)/64
49 |		and L10 is the natural log of 10. Then
50 |		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51 |		Go to expr to compute that expression.
52 |
53 |	expr
54 |	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55 |
56 |	2. Overwrite Fact1 and Fact2 by
57 |		Fact1 := 2**(M) * Fact1
58 |		Fact2 := 2**(M) * Fact2
59 |		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60 |
61 |	3. Calculate P where 1 + P approximates exp(r):
62 |		P = r + r*r*(A1+r*(A2+...+r*A5)).
63 |
64 |	4. Let AdjFact := 2**(M'). Return
65 |		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66 |		Exit.
67 |
68 |	ExpBig
69 |	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70 |		underflow by Tiny * Tiny.
71 |
72 |	ExpSm
73 |	1. Return 1 + X.
74 |
75 
76 |		Copyright (C) Motorola, Inc. 1990
77 |			All Rights Reserved
78 |
79 |       For details on the license for this file, please see the
80 |       file, README, in this same directory.
81 
82 |STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
83 
84 	|section	8
85 
86 #include "fpsp.h"
87 
88 BOUNDS1:	.long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
89 BOUNDS2:	.long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
90 
91 L2TEN64:	.long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
92 L10TWO1:	.long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
93 
94 L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
95 
96 LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
97 
98 LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
99 
100 EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
101 EXPA4:	.long 0x3F811112,0x302C712C
102 EXPA3:	.long 0x3FA55555,0x55554CC1
103 EXPA2:	.long 0x3FC55555,0x55554A54
104 EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
105 
106 HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
107 TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108 
109 EXPTBL:
110 	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
111 	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
112 	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
113 	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
114 	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
115 	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
116 	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
117 	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
118 	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
119 	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
120 	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
121 	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
122 	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
123 	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
124 	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
125 	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
126 	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
127 	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
128 	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
129 	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
130 	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
131 	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
132 	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
133 	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
134 	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
135 	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
136 	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
137 	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
138 	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
139 	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
140 	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
141 	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
142 	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
143 	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
144 	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
145 	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
146 	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
147 	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
148 	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
149 	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
150 	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
151 	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
152 	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
153 	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
154 	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
155 	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
156 	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
157 	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
158 	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
159 	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
160 	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
161 	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
162 	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
163 	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
164 	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
165 	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
166 	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
167 	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
168 	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
169 	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
170 	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
171 	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
172 	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
173 	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
174 
175 	.set	N,L_SCR1
176 
177 	.set	X,FP_SCR1
178 	.set	XDCARE,X+2
179 	.set	XFRAC,X+4
180 
181 	.set	ADJFACT,FP_SCR2
182 
183 	.set	FACT1,FP_SCR3
184 	.set	FACT1HI,FACT1+4
185 	.set	FACT1LOW,FACT1+8
186 
187 	.set	FACT2,FP_SCR4
188 	.set	FACT2HI,FACT2+4
189 	.set	FACT2LOW,FACT2+8
190 
191 	| xref	t_unfl
192 	|xref	t_ovfl
193 	|xref	t_frcinx
194 
195 	.global	stwotoxd
196 stwotoxd:
197 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
198 
199 	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
200 	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
201 	movel		(%a0),%d0
202 	orl		#0x00800001,%d0
203 	fadds		%d0,%fp0
204 	bra		t_frcinx
205 
206 	.global	stwotox
207 stwotox:
208 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
209 	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
210 
211 	movel		(%a0),%d0
212 	movew		4(%a0),%d0
213 	fmovex		%fp0,X(%a6)
214 	andil		#0x7FFFFFFF,%d0
215 
216 	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
217 	bges		TWOOK1
218 	bra		EXPBORS
219 
220 TWOOK1:
221 	cmpil		#0x400D80C0,%d0		| ...|X| > 16480?
222 	bles		TWOMAIN
223 	bra		EXPBORS
224 
225 
226 TWOMAIN:
227 |--USUAL CASE, 2^(-70) <= |X| <= 16480
228 
229 	fmovex		%fp0,%fp1
230 	fmuls		#0x42800000,%fp1  | ...64 * X
231 
232 	fmovel		%fp1,N(%a6)		| ...N = ROUND-TO-INT(64 X)
233 	movel		%d2,-(%sp)
234 	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
235 	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
236 	movel		N(%a6),%d0
237 	movel		%d0,%d2
238 	andil		#0x3F,%d0		| ...D0 IS J
239 	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
240 	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
241 	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
242 	movel		%d2,%d0
243 	asrl		#1,%d0		| ...D0 IS M
244 	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
245 	addil		#0x3FFF,%d2
246 	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
247 	movel		(%sp)+,%d2
248 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
249 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
250 |--ADJFACT = 2^(M').
251 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
252 
253 	fmuls		#0x3C800000,%fp1  | ...(1/64)*N
254 	movel		(%a1)+,FACT1(%a6)
255 	movel		(%a1)+,FACT1HI(%a6)
256 	movel		(%a1)+,FACT1LOW(%a6)
257 	movew		(%a1)+,FACT2(%a6)
258 	clrw		FACT2+2(%a6)
259 
260 	fsubx		%fp1,%fp0		| ...X - (1/64)*INT(64 X)
261 
262 	movew		(%a1)+,FACT2HI(%a6)
263 	clrw		FACT2HI+2(%a6)
264 	clrl		FACT2LOW(%a6)
265 	addw		%d0,FACT1(%a6)
266 
267 	fmulx		LOG2,%fp0	| ...FP0 IS R
268 	addw		%d0,FACT2(%a6)
269 
270 	bra		expr
271 
272 EXPBORS:
273 |--FPCR, D0 SAVED
274 	cmpil		#0x3FFF8000,%d0
275 	bgts		EXPBIG
276 
277 EXPSM:
278 |--|X| IS SMALL, RETURN 1 + X
279 
280 	fmovel		%d1,%FPCR		|restore users exceptions
281 	fadds		#0x3F800000,%fp0  | ...RETURN 1 + X
282 
283 	bra		t_frcinx
284 
285 EXPBIG:
286 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
287 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
288 	movel		X(%a6),%d0
289 	cmpil		#0,%d0
290 	blts		EXPNEG
291 
292 	bclrb		#7,(%a0)		|t_ovfl expects positive value
293 	bra		t_ovfl
294 
295 EXPNEG:
296 	bclrb		#7,(%a0)		|t_unfl expects positive value
297 	bra		t_unfl
298 
299 	.global	stentoxd
300 stentoxd:
301 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
302 
303 	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
304 	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
305 	movel		(%a0),%d0
306 	orl		#0x00800001,%d0
307 	fadds		%d0,%fp0
308 	bra		t_frcinx
309 
310 	.global	stentox
311 stentox:
312 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
313 	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
314 
315 	movel		(%a0),%d0
316 	movew		4(%a0),%d0
317 	fmovex		%fp0,X(%a6)
318 	andil		#0x7FFFFFFF,%d0
319 
320 	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
321 	bges		TENOK1
322 	bra		EXPBORS
323 
324 TENOK1:
325 	cmpil		#0x400B9B07,%d0		| ...|X| <= 16480*log2/log10 ?
326 	bles		TENMAIN
327 	bra		EXPBORS
328 
329 TENMAIN:
330 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
331 
332 	fmovex		%fp0,%fp1
333 	fmuld		L2TEN64,%fp1	| ...X*64*LOG10/LOG2
334 
335 	fmovel		%fp1,N(%a6)		| ...N=INT(X*64*LOG10/LOG2)
336 	movel		%d2,-(%sp)
337 	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
338 	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
339 	movel		N(%a6),%d0
340 	movel		%d0,%d2
341 	andil		#0x3F,%d0		| ...D0 IS J
342 	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
343 	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
344 	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
345 	movel		%d2,%d0
346 	asrl		#1,%d0		| ...D0 IS M
347 	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
348 	addil		#0x3FFF,%d2
349 	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
350 	movel		(%sp)+,%d2
351 
352 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
353 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
354 |--ADJFACT = 2^(M').
355 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
356 
357 	fmovex		%fp1,%fp2
358 
359 	fmuld		L10TWO1,%fp1	| ...N*(LOG2/64LOG10)_LEAD
360 	movel		(%a1)+,FACT1(%a6)
361 
362 	fmulx		L10TWO2,%fp2	| ...N*(LOG2/64LOG10)_TRAIL
363 
364 	movel		(%a1)+,FACT1HI(%a6)
365 	movel		(%a1)+,FACT1LOW(%a6)
366 	fsubx		%fp1,%fp0		| ...X - N L_LEAD
367 	movew		(%a1)+,FACT2(%a6)
368 
369 	fsubx		%fp2,%fp0		| ...X - N L_TRAIL
370 
371 	clrw		FACT2+2(%a6)
372 	movew		(%a1)+,FACT2HI(%a6)
373 	clrw		FACT2HI+2(%a6)
374 	clrl		FACT2LOW(%a6)
375 
376 	fmulx		LOG10,%fp0	| ...FP0 IS R
377 
378 	addw		%d0,FACT1(%a6)
379 	addw		%d0,FACT2(%a6)
380 
381 expr:
382 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
383 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
384 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
385 |--	2**(M'+M) * 2**(J/64) * EXP(R)
386 
387 	fmovex		%fp0,%fp1
388 	fmulx		%fp1,%fp1		| ...FP1 IS S = R*R
389 
390 	fmoved		EXPA5,%fp2	| ...FP2 IS A5
391 	fmoved		EXPA4,%fp3	| ...FP3 IS A4
392 
393 	fmulx		%fp1,%fp2		| ...FP2 IS S*A5
394 	fmulx		%fp1,%fp3		| ...FP3 IS S*A4
395 
396 	faddd		EXPA3,%fp2	| ...FP2 IS A3+S*A5
397 	faddd		EXPA2,%fp3	| ...FP3 IS A2+S*A4
398 
399 	fmulx		%fp1,%fp2		| ...FP2 IS S*(A3+S*A5)
400 	fmulx		%fp1,%fp3		| ...FP3 IS S*(A2+S*A4)
401 
402 	faddd		EXPA1,%fp2	| ...FP2 IS A1+S*(A3+S*A5)
403 	fmulx		%fp0,%fp3		| ...FP3 IS R*S*(A2+S*A4)
404 
405 	fmulx		%fp1,%fp2		| ...FP2 IS S*(A1+S*(A3+S*A5))
406 	faddx		%fp3,%fp0		| ...FP0 IS R+R*S*(A2+S*A4)
407 
408 	faddx		%fp2,%fp0		| ...FP0 IS EXP(R) - 1
409 
410 
411 |--FINAL RECONSTRUCTION PROCESS
412 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
413 
414 	fmulx		FACT1(%a6),%fp0
415 	faddx		FACT2(%a6),%fp0
416 	faddx		FACT1(%a6),%fp0
417 
418 	fmovel		%d1,%FPCR		|restore users exceptions
419 	clrw		ADJFACT+2(%a6)
420 	movel		#0x80000000,ADJFACT+4(%a6)
421 	clrl		ADJFACT+8(%a6)
422 	fmulx		ADJFACT(%a6),%fp0	| ...FINAL ADJUSTMENT
423 
424 	bra		t_frcinx
425 
426 	|end
427