1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
6 
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8 
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15 
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22 
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 # freal.s:
30 #	This file is appended to the top of the 060FPSP package
31 # and contains the entry points into the package. The user, in
32 # effect, branches to one of the branch table entries located
33 # after _060FPSP_TABLE.
34 #	Also, subroutine stubs exist in this file (_fpsp_done for
35 # example) that are referenced by the FPSP package itself in order
36 # to call a given routine. The stub routine actually performs the
37 # callout. The FPSP code does a "bsr" to the stub routine. This
38 # extra layer of hierarchy adds a slight performance penalty but
39 # it makes the FPSP code easier to read and more mainatinable.
40 #
41 
42 set	_off_bsun,	0x00
43 set	_off_snan,	0x04
44 set	_off_operr,	0x08
45 set	_off_ovfl,	0x0c
46 set	_off_unfl,	0x10
47 set	_off_dz,	0x14
48 set	_off_inex,	0x18
49 set	_off_fline,	0x1c
50 set	_off_fpu_dis,	0x20
51 set	_off_trap,	0x24
52 set	_off_trace,	0x28
53 set	_off_access,	0x2c
54 set	_off_done,	0x30
55 
56 set	_off_imr,	0x40
57 set	_off_dmr,	0x44
58 set	_off_dmw,	0x48
59 set	_off_irw,	0x4c
60 set	_off_irl,	0x50
61 set	_off_drb,	0x54
62 set	_off_drw,	0x58
63 set	_off_drl,	0x5c
64 set	_off_dwb,	0x60
65 set	_off_dww,	0x64
66 set	_off_dwl,	0x68
67 
68 _060FPSP_TABLE:
69 
70 ###############################################################
71 
72 # Here's the table of ENTRY POINTS for those linking the package.
73 	bra.l		_fpsp_snan
74 	short		0x0000
75 	bra.l		_fpsp_operr
76 	short		0x0000
77 	bra.l		_fpsp_ovfl
78 	short		0x0000
79 	bra.l		_fpsp_unfl
80 	short		0x0000
81 	bra.l		_fpsp_dz
82 	short		0x0000
83 	bra.l		_fpsp_inex
84 	short		0x0000
85 	bra.l		_fpsp_fline
86 	short		0x0000
87 	bra.l		_fpsp_unsupp
88 	short		0x0000
89 	bra.l		_fpsp_effadd
90 	short		0x0000
91 
92 	space		56
93 
94 ###############################################################
95 	global		_fpsp_done
96 _fpsp_done:
97 	mov.l		%d0,-(%sp)
98 	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
100 	mov.l		0x4(%sp),%d0
101 	rtd		&0x4
102 
103 	global		_real_ovfl
104 _real_ovfl:
105 	mov.l		%d0,-(%sp)
106 	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
108 	mov.l		0x4(%sp),%d0
109 	rtd		&0x4
110 
111 	global		_real_unfl
112 _real_unfl:
113 	mov.l		%d0,-(%sp)
114 	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
116 	mov.l		0x4(%sp),%d0
117 	rtd		&0x4
118 
119 	global		_real_inex
120 _real_inex:
121 	mov.l		%d0,-(%sp)
122 	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
124 	mov.l		0x4(%sp),%d0
125 	rtd		&0x4
126 
127 	global		_real_bsun
128 _real_bsun:
129 	mov.l		%d0,-(%sp)
130 	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
132 	mov.l		0x4(%sp),%d0
133 	rtd		&0x4
134 
135 	global		_real_operr
136 _real_operr:
137 	mov.l		%d0,-(%sp)
138 	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
140 	mov.l		0x4(%sp),%d0
141 	rtd		&0x4
142 
143 	global		_real_snan
144 _real_snan:
145 	mov.l		%d0,-(%sp)
146 	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
148 	mov.l		0x4(%sp),%d0
149 	rtd		&0x4
150 
151 	global		_real_dz
152 _real_dz:
153 	mov.l		%d0,-(%sp)
154 	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
156 	mov.l		0x4(%sp),%d0
157 	rtd		&0x4
158 
159 	global		_real_fline
160 _real_fline:
161 	mov.l		%d0,-(%sp)
162 	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
164 	mov.l		0x4(%sp),%d0
165 	rtd		&0x4
166 
167 	global		_real_fpu_disabled
168 _real_fpu_disabled:
169 	mov.l		%d0,-(%sp)
170 	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
172 	mov.l		0x4(%sp),%d0
173 	rtd		&0x4
174 
175 	global		_real_trap
176 _real_trap:
177 	mov.l		%d0,-(%sp)
178 	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
180 	mov.l		0x4(%sp),%d0
181 	rtd		&0x4
182 
183 	global		_real_trace
184 _real_trace:
185 	mov.l		%d0,-(%sp)
186 	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
188 	mov.l		0x4(%sp),%d0
189 	rtd		&0x4
190 
191 	global		_real_access
192 _real_access:
193 	mov.l		%d0,-(%sp)
194 	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
196 	mov.l		0x4(%sp),%d0
197 	rtd		&0x4
198 
199 #######################################
200 
201 	global		_imem_read
202 _imem_read:
203 	mov.l		%d0,-(%sp)
204 	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
206 	mov.l		0x4(%sp),%d0
207 	rtd		&0x4
208 
209 	global		_dmem_read
210 _dmem_read:
211 	mov.l		%d0,-(%sp)
212 	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
214 	mov.l		0x4(%sp),%d0
215 	rtd		&0x4
216 
217 	global		_dmem_write
218 _dmem_write:
219 	mov.l		%d0,-(%sp)
220 	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
222 	mov.l		0x4(%sp),%d0
223 	rtd		&0x4
224 
225 	global		_imem_read_word
226 _imem_read_word:
227 	mov.l		%d0,-(%sp)
228 	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
230 	mov.l		0x4(%sp),%d0
231 	rtd		&0x4
232 
233 	global		_imem_read_long
234 _imem_read_long:
235 	mov.l		%d0,-(%sp)
236 	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
238 	mov.l		0x4(%sp),%d0
239 	rtd		&0x4
240 
241 	global		_dmem_read_byte
242 _dmem_read_byte:
243 	mov.l		%d0,-(%sp)
244 	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
246 	mov.l		0x4(%sp),%d0
247 	rtd		&0x4
248 
249 	global		_dmem_read_word
250 _dmem_read_word:
251 	mov.l		%d0,-(%sp)
252 	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
254 	mov.l		0x4(%sp),%d0
255 	rtd		&0x4
256 
257 	global		_dmem_read_long
258 _dmem_read_long:
259 	mov.l		%d0,-(%sp)
260 	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
262 	mov.l		0x4(%sp),%d0
263 	rtd		&0x4
264 
265 	global		_dmem_write_byte
266 _dmem_write_byte:
267 	mov.l		%d0,-(%sp)
268 	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
270 	mov.l		0x4(%sp),%d0
271 	rtd		&0x4
272 
273 	global		_dmem_write_word
274 _dmem_write_word:
275 	mov.l		%d0,-(%sp)
276 	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
278 	mov.l		0x4(%sp),%d0
279 	rtd		&0x4
280 
281 	global		_dmem_write_long
282 _dmem_write_long:
283 	mov.l		%d0,-(%sp)
284 	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
286 	mov.l		0x4(%sp),%d0
287 	rtd		&0x4
288 
289 #
290 # This file contains a set of define statements for constants
291 # in order to promote readability within the corecode itself.
292 #
293 
294 set LOCAL_SIZE,		192			# stack frame size(bytes)
295 set LV,			-LOCAL_SIZE		# stack offset
296 
297 set EXC_SR,		0x4			# stack status register
298 set EXC_PC,		0x6			# stack pc
299 set EXC_VOFF,		0xa			# stacked vector offset
300 set EXC_EA,		0xc			# stacked <ea>
301 
302 set EXC_FP,		0x0			# frame pointer
303 
304 set EXC_AREGS,		-68			# offset of all address regs
305 set EXC_DREGS,		-100			# offset of all data regs
306 set EXC_FPREGS,		-36			# offset of all fp regs
307 
308 set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
309 set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
310 set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
311 set EXC_A5,		EXC_AREGS+(5*4)
312 set EXC_A4,		EXC_AREGS+(4*4)
313 set EXC_A3,		EXC_AREGS+(3*4)
314 set EXC_A2,		EXC_AREGS+(2*4)
315 set EXC_A1,		EXC_AREGS+(1*4)
316 set EXC_A0,		EXC_AREGS+(0*4)
317 set EXC_D7,		EXC_DREGS+(7*4)
318 set EXC_D6,		EXC_DREGS+(6*4)
319 set EXC_D5,		EXC_DREGS+(5*4)
320 set EXC_D4,		EXC_DREGS+(4*4)
321 set EXC_D3,		EXC_DREGS+(3*4)
322 set EXC_D2,		EXC_DREGS+(2*4)
323 set EXC_D1,		EXC_DREGS+(1*4)
324 set EXC_D0,		EXC_DREGS+(0*4)
325 
326 set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
327 set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
328 set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
329 
330 set FP_SCR1,		LV+80			# fp scratch 1
331 set FP_SCR1_EX,		FP_SCR1+0
332 set FP_SCR1_SGN,	FP_SCR1+2
333 set FP_SCR1_HI,		FP_SCR1+4
334 set FP_SCR1_LO,		FP_SCR1+8
335 
336 set FP_SCR0,		LV+68			# fp scratch 0
337 set FP_SCR0_EX,		FP_SCR0+0
338 set FP_SCR0_SGN,	FP_SCR0+2
339 set FP_SCR0_HI,		FP_SCR0+4
340 set FP_SCR0_LO,		FP_SCR0+8
341 
342 set FP_DST,		LV+56			# fp destination operand
343 set FP_DST_EX,		FP_DST+0
344 set FP_DST_SGN,		FP_DST+2
345 set FP_DST_HI,		FP_DST+4
346 set FP_DST_LO,		FP_DST+8
347 
348 set FP_SRC,		LV+44			# fp source operand
349 set FP_SRC_EX,		FP_SRC+0
350 set FP_SRC_SGN,		FP_SRC+2
351 set FP_SRC_HI,		FP_SRC+4
352 set FP_SRC_LO,		FP_SRC+8
353 
354 set USER_FPIAR,		LV+40			# FP instr address register
355 
356 set USER_FPSR,		LV+36			# FP status register
357 set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
358 set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
359 set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
360 set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
361 
362 set USER_FPCR,		LV+32			# FP control register
363 set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
364 set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
365 
366 set L_SCR3,		LV+28			# integer scratch 3
367 set L_SCR2,		LV+24			# integer scratch 2
368 set L_SCR1,		LV+20			# integer scratch 1
369 
370 set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
371 
372 set EXC_TEMP2,		LV+24			# temporary space
373 set EXC_TEMP,		LV+16			# temporary space
374 
375 set DTAG,		LV+15			# destination operand type
376 set STAG,		LV+14			# source operand type
377 
378 set SPCOND_FLG,		LV+10			# flag: special case (see below)
379 
380 set EXC_CC,		LV+8			# saved condition codes
381 set EXC_EXTWPTR,	LV+4			# saved current PC (active)
382 set EXC_EXTWORD,	LV+2			# saved extension word
383 set EXC_CMDREG,		LV+2			# saved extension word
384 set EXC_OPWORD,		LV+0			# saved operation word
385 
386 ################################
387 
388 # Helpful macros
389 
390 set FTEMP,		0			# offsets within an
391 set FTEMP_EX,		0			# extended precision
392 set FTEMP_SGN,		2			# value saved in memory.
393 set FTEMP_HI,		4
394 set FTEMP_LO,		8
395 set FTEMP_GRS,		12
396 
397 set LOCAL,		0			# offsets within an
398 set LOCAL_EX,		0			# extended precision
399 set LOCAL_SGN,		2			# value saved in memory.
400 set LOCAL_HI,		4
401 set LOCAL_LO,		8
402 set LOCAL_GRS,		12
403 
404 set DST,		0			# offsets within an
405 set DST_EX,		0			# extended precision
406 set DST_HI,		4			# value saved in memory.
407 set DST_LO,		8
408 
409 set SRC,		0			# offsets within an
410 set SRC_EX,		0			# extended precision
411 set SRC_HI,		4			# value saved in memory.
412 set SRC_LO,		8
413 
414 set SGL_LO,		0x3f81			# min sgl prec exponent
415 set SGL_HI,		0x407e			# max sgl prec exponent
416 set DBL_LO,		0x3c01			# min dbl prec exponent
417 set DBL_HI,		0x43fe			# max dbl prec exponent
418 set EXT_LO,		0x0			# min ext prec exponent
419 set EXT_HI,		0x7ffe			# max ext prec exponent
420 
421 set EXT_BIAS,		0x3fff			# extended precision bias
422 set SGL_BIAS,		0x007f			# single precision bias
423 set DBL_BIAS,		0x03ff			# double precision bias
424 
425 set NORM,		0x00			# operand type for STAG/DTAG
426 set ZERO,		0x01			# operand type for STAG/DTAG
427 set INF,		0x02			# operand type for STAG/DTAG
428 set QNAN,		0x03			# operand type for STAG/DTAG
429 set DENORM,		0x04			# operand type for STAG/DTAG
430 set SNAN,		0x05			# operand type for STAG/DTAG
431 set UNNORM,		0x06			# operand type for STAG/DTAG
432 
433 ##################
434 # FPSR/FPCR bits #
435 ##################
436 set neg_bit,		0x3			# negative result
437 set z_bit,		0x2			# zero result
438 set inf_bit,		0x1			# infinite result
439 set nan_bit,		0x0			# NAN result
440 
441 set q_sn_bit,		0x7			# sign bit of quotient byte
442 
443 set bsun_bit,		7			# branch on unordered
444 set snan_bit,		6			# signalling NAN
445 set operr_bit,		5			# operand error
446 set ovfl_bit,		4			# overflow
447 set unfl_bit,		3			# underflow
448 set dz_bit,		2			# divide by zero
449 set inex2_bit,		1			# inexact result 2
450 set inex1_bit,		0			# inexact result 1
451 
452 set aiop_bit,		7			# accrued inexact operation bit
453 set aovfl_bit,		6			# accrued overflow bit
454 set aunfl_bit,		5			# accrued underflow bit
455 set adz_bit,		4			# accrued dz bit
456 set ainex_bit,		3			# accrued inexact bit
457 
458 #############################
459 # FPSR individual bit masks #
460 #############################
461 set neg_mask,		0x08000000		# negative bit mask (lw)
462 set inf_mask,		0x02000000		# infinity bit mask (lw)
463 set z_mask,		0x04000000		# zero bit mask (lw)
464 set nan_mask,		0x01000000		# nan bit mask (lw)
465 
466 set neg_bmask,		0x08			# negative bit mask (byte)
467 set inf_bmask,		0x02			# infinity bit mask (byte)
468 set z_bmask,		0x04			# zero bit mask (byte)
469 set nan_bmask,		0x01			# nan bit mask (byte)
470 
471 set bsun_mask,		0x00008000		# bsun exception mask
472 set snan_mask,		0x00004000		# snan exception mask
473 set operr_mask,		0x00002000		# operr exception mask
474 set ovfl_mask,		0x00001000		# overflow exception mask
475 set unfl_mask,		0x00000800		# underflow exception mask
476 set dz_mask,		0x00000400		# dz exception mask
477 set inex2_mask,		0x00000200		# inex2 exception mask
478 set inex1_mask,		0x00000100		# inex1 exception mask
479 
480 set aiop_mask,		0x00000080		# accrued illegal operation
481 set aovfl_mask,		0x00000040		# accrued overflow
482 set aunfl_mask,		0x00000020		# accrued underflow
483 set adz_mask,		0x00000010		# accrued divide by zero
484 set ainex_mask,		0x00000008		# accrued inexact
485 
486 ######################################
487 # FPSR combinations used in the FPSP #
488 ######################################
489 set dzinf_mask,		inf_mask+dz_mask+adz_mask
490 set opnan_mask,		nan_mask+operr_mask+aiop_mask
491 set nzi_mask,		0x01ffffff		#clears N, Z, and I
492 set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493 set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
494 set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495 set inx1a_mask,		inex1_mask+ainex_mask
496 set inx2a_mask,		inex2_mask+ainex_mask
497 set snaniop_mask,	nan_mask+snan_mask+aiop_mask
498 set snaniop2_mask,	snan_mask+aiop_mask
499 set naniop_mask,	nan_mask+aiop_mask
500 set neginf_mask,	neg_mask+inf_mask
501 set infaiop_mask,	inf_mask+aiop_mask
502 set negz_mask,		neg_mask+z_mask
503 set opaop_mask,		operr_mask+aiop_mask
504 set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
505 set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
506 
507 #########
508 # misc. #
509 #########
510 set rnd_stky_bit,	29			# stky bit pos in longword
511 
512 set sign_bit,		0x7			# sign bit
513 set signan_bit,		0x6			# signalling nan bit
514 
515 set sgl_thresh,		0x3f81			# minimum sgl exponent
516 set dbl_thresh,		0x3c01			# minimum dbl exponent
517 
518 set x_mode,		0x0			# extended precision
519 set s_mode,		0x4			# single precision
520 set d_mode,		0x8			# double precision
521 
522 set rn_mode,		0x0			# round-to-nearest
523 set rz_mode,		0x1			# round-to-zero
524 set rm_mode,		0x2			# round-tp-minus-infinity
525 set rp_mode,		0x3			# round-to-plus-infinity
526 
527 set mantissalen,	64			# length of mantissa in bits
528 
529 set BYTE,		1			# len(byte) == 1 byte
530 set WORD,		2			# len(word) == 2 bytes
531 set LONG,		4			# len(longword) == 2 bytes
532 
533 set BSUN_VEC,		0xc0			# bsun    vector offset
534 set INEX_VEC,		0xc4			# inexact vector offset
535 set DZ_VEC,		0xc8			# dz      vector offset
536 set UNFL_VEC,		0xcc			# unfl    vector offset
537 set OPERR_VEC,		0xd0			# operr   vector offset
538 set OVFL_VEC,		0xd4			# ovfl    vector offset
539 set SNAN_VEC,		0xd8			# snan    vector offset
540 
541 ###########################
542 # SPecial CONDition FLaGs #
543 ###########################
544 set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
545 set fbsun_flg,		0x02			# flag bit: bsun exception
546 set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
547 set mda7_flg,		0x08			# flag bit: -(a7) <ea>
548 set fmovm_flg,		0x40			# flag bit: fmovm instruction
549 set immed_flg,		0x80			# flag bit: &<data> <ea>
550 
551 set ftrapcc_bit,	0x0
552 set fbsun_bit,		0x1
553 set mia7_bit,		0x2
554 set mda7_bit,		0x3
555 set immed_bit,		0x7
556 
557 ##################################
558 # TRANSCENDENTAL "LAST-OP" FLAGS #
559 ##################################
560 set FMUL_OP,		0x0			# fmul instr performed last
561 set FDIV_OP,		0x1			# fdiv performed last
562 set FADD_OP,		0x2			# fadd performed last
563 set FMOV_OP,		0x3			# fmov performed last
564 
565 #############
566 # CONSTANTS #
567 #############
568 T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
569 T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
570 
571 PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572 PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
573 
574 TWOBYPI:
575 	long		0x3FE45F30,0x6DC9C883
576 
577 #########################################################################
578 # XDEF ****************************************************************	#
579 #	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
580 #									#
581 #	This handler should be the first code executed upon taking the	#
582 #	FP Overflow exception in an operating system.			#
583 #									#
584 # XREF ****************************************************************	#
585 #	_imem_read_long() - read instruction longword			#
586 #	fix_skewed_ops() - adjust src operand in fsave frame		#
587 #	set_tag_x() - determine optype of src/dst operands		#
588 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
589 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
590 #	load_fpn2() - load dst operand from FP regfile			#
591 #	fout() - emulate an opclass 3 instruction			#
592 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
593 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
594 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
595 #	_real_inex() - "callout" for Inexact exception enabled code	#
596 #	_real_trace() - "callout" for Trace exception code		#
597 #									#
598 # INPUT ***************************************************************	#
599 #	- The system stack contains the FP Ovfl exception stack frame	#
600 #	- The fsave frame contains the source operand			#
601 #									#
602 # OUTPUT **************************************************************	#
603 #	Overflow Exception enabled:					#
604 #	- The system stack is unchanged					#
605 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
606 #	Overflow Exception disabled:					#
607 #	- The system stack is unchanged					#
608 #	- The "exception present" flag in the fsave frame is cleared	#
609 #									#
610 # ALGORITHM ***********************************************************	#
611 #	On the 060, if an FP overflow is present as the result of any	#
612 # instruction, the 060 will take an overflow exception whether the	#
613 # exception is enabled or disabled in the FPCR. For the disabled case,	#
614 # This handler emulates the instruction to determine what the correct	#
615 # default result should be for the operation. This default result is	#
616 # then stored in either the FP regfile, data regfile, or memory.	#
617 # Finally, the handler exits through the "callout" _fpsp_done()		#
618 # denoting that no exceptional conditions exist within the machine.	#
619 #	If the exception is enabled, then this handler must create the	#
620 # exceptional operand and plave it in the fsave state frame, and store	#
621 # the default result (only if the instruction is opclass 3). For	#
622 # exceptions enabled, this handler must exit through the "callout"	#
623 # _real_ovfl() so that the operating system enabled overflow handler	#
624 # can handle this case.							#
625 #	Two other conditions exist. First, if overflow was disabled	#
626 # but the inexact exception was enabled, this handler must exit		#
627 # through the "callout" _real_inex() regardless of whether the result	#
628 # was inexact.								#
629 #	Also, in the case of an opclass three instruction where		#
630 # overflow was disabled and the trace exception was enabled, this	#
631 # handler must exit through the "callout" _real_trace().		#
632 #									#
633 #########################################################################
634 
635 	global		_fpsp_ovfl
636 _fpsp_ovfl:
637 
638 #$#	sub.l		&24,%sp			# make room for src/dst
639 
640 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
641 
642 	fsave		FP_SRC(%a6)		# grab the "busy" frame
643 
644 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
645 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
647 
648 # the FPIAR holds the "current PC" of the faulting instruction
649 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
651 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
652 	bsr.l		_imem_read_long		# fetch the instruction words
653 	mov.l		%d0,EXC_OPWORD(%a6)
654 
655 ##############################################################################
656 
657 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
658 	bne.w		fovfl_out
659 
660 
661 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
662 	bsr.l		fix_skewed_ops		# fix src op
663 
664 # since, I believe, only NORMs and DENORMs can come through here,
665 # maybe we can avoid the subroutine call.
666 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
667 	bsr.l		set_tag_x		# tag the operand type
668 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
669 
670 # bit five of the fp extension word separates the monadic and dyadic operations
671 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672 # will never take this exception.
673 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
674 	beq.b		fovfl_extract		# monadic
675 
676 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677 	bsr.l		load_fpn2		# load dst into FP_DST
678 
679 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
680 	bsr.l		set_tag_x		# tag the operand type
681 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
682 	bne.b		fovfl_op2_done		# no
683 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
684 fovfl_op2_done:
685 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
686 
687 fovfl_extract:
688 
689 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
695 
696 	clr.l		%d0
697 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
698 
699 	mov.b		1+EXC_CMDREG(%a6),%d1
700 	andi.w		&0x007f,%d1		# extract extension
701 
702 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
703 
704 	fmov.l		&0x0,%fpcr		# zero current control regs
705 	fmov.l		&0x0,%fpsr
706 
707 	lea		FP_SRC(%a6),%a0
708 	lea		FP_DST(%a6),%a1
709 
710 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
711 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
713 
714 # the operation has been emulated. the result is in fp0.
715 # the EXOP, if an exception occurred, is in fp1.
716 # we must save the default result regardless of whether
717 # traps are enabled or disabled.
718 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
719 	bsr.l		store_fpreg
720 
721 # the exceptional possibilities we have left ourselves with are ONLY overflow
722 # and inexact. and, the inexact is such that overflow occurred and was disabled
723 # but inexact was enabled.
724 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
725 	bne.b		fovfl_ovfl_on
726 
727 	btst		&inex2_bit,FPCR_ENABLE(%a6)
728 	bne.b		fovfl_inex_on
729 
730 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
731 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
733 
734 	unlk		%a6
735 #$#	add.l		&24,%sp
736 	bra.l		_fpsp_done
737 
738 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739 # in fp1. now, simply jump to _real_ovfl()!
740 fovfl_ovfl_on:
741 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
742 
743 	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
744 
745 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
746 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
748 
749 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
750 
751 	unlk		%a6
752 
753 	bra.l		_real_ovfl
754 
755 # overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756 # we must jump to real_inex().
757 fovfl_inex_on:
758 
759 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
760 
761 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
762 	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
763 
764 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
765 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
767 
768 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
769 
770 	unlk		%a6
771 
772 	bra.l		_real_inex
773 
774 ########################################################################
775 fovfl_out:
776 
777 
778 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
781 
782 # the src operand is definitely a NORM(!), so tag it as such
783 	mov.b		&NORM,STAG(%a6)		# set src optype tag
784 
785 	clr.l		%d0
786 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
787 
788 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
789 
790 	fmov.l		&0x0,%fpcr		# zero current control regs
791 	fmov.l		&0x0,%fpsr
792 
793 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
794 
795 	bsr.l		fout
796 
797 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
798 	bne.w		fovfl_ovfl_on
799 
800 	btst		&inex2_bit,FPCR_ENABLE(%a6)
801 	bne.w		fovfl_inex_on
802 
803 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
804 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
806 
807 	unlk		%a6
808 #$#	add.l		&24,%sp
809 
810 	btst		&0x7,(%sp)		# is trace on?
811 	beq.l		_fpsp_done		# no
812 
813 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
814 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
815 	bra.l		_real_trace
816 
817 #########################################################################
818 # XDEF ****************************************************************	#
819 #	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
820 #									#
821 #	This handler should be the first code executed upon taking the	#
822 #	FP Underflow exception in an operating system.			#
823 #									#
824 # XREF ****************************************************************	#
825 #	_imem_read_long() - read instruction longword			#
826 #	fix_skewed_ops() - adjust src operand in fsave frame		#
827 #	set_tag_x() - determine optype of src/dst operands		#
828 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
829 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
830 #	load_fpn2() - load dst operand from FP regfile			#
831 #	fout() - emulate an opclass 3 instruction			#
832 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
833 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
834 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
835 #	_real_inex() - "callout" for Inexact exception enabled code	#
836 #	_real_trace() - "callout" for Trace exception code		#
837 #									#
838 # INPUT ***************************************************************	#
839 #	- The system stack contains the FP Unfl exception stack frame	#
840 #	- The fsave frame contains the source operand			#
841 #									#
842 # OUTPUT **************************************************************	#
843 #	Underflow Exception enabled:					#
844 #	- The system stack is unchanged					#
845 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
846 #	Underflow Exception disabled:					#
847 #	- The system stack is unchanged					#
848 #	- The "exception present" flag in the fsave frame is cleared	#
849 #									#
850 # ALGORITHM ***********************************************************	#
851 #	On the 060, if an FP underflow is present as the result of any	#
852 # instruction, the 060 will take an underflow exception whether the	#
853 # exception is enabled or disabled in the FPCR. For the disabled case,	#
854 # This handler emulates the instruction to determine what the correct	#
855 # default result should be for the operation. This default result is	#
856 # then stored in either the FP regfile, data regfile, or memory.	#
857 # Finally, the handler exits through the "callout" _fpsp_done()		#
858 # denoting that no exceptional conditions exist within the machine.	#
859 #	If the exception is enabled, then this handler must create the	#
860 # exceptional operand and plave it in the fsave state frame, and store	#
861 # the default result (only if the instruction is opclass 3). For	#
862 # exceptions enabled, this handler must exit through the "callout"	#
863 # _real_unfl() so that the operating system enabled overflow handler	#
864 # can handle this case.							#
865 #	Two other conditions exist. First, if underflow was disabled	#
866 # but the inexact exception was enabled and the result was inexact,	#
867 # this handler must exit through the "callout" _real_inex().		#
868 # was inexact.								#
869 #	Also, in the case of an opclass three instruction where		#
870 # underflow was disabled and the trace exception was enabled, this	#
871 # handler must exit through the "callout" _real_trace().		#
872 #									#
873 #########################################################################
874 
875 	global		_fpsp_unfl
876 _fpsp_unfl:
877 
878 #$#	sub.l		&24,%sp			# make room for src/dst
879 
880 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
881 
882 	fsave		FP_SRC(%a6)		# grab the "busy" frame
883 
884 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
885 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
887 
888 # the FPIAR holds the "current PC" of the faulting instruction
889 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
891 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
892 	bsr.l		_imem_read_long		# fetch the instruction words
893 	mov.l		%d0,EXC_OPWORD(%a6)
894 
895 ##############################################################################
896 
897 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
898 	bne.w		funfl_out
899 
900 
901 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
902 	bsr.l		fix_skewed_ops		# fix src op
903 
904 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
905 	bsr.l		set_tag_x		# tag the operand type
906 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
907 
908 # bit five of the fp ext word separates the monadic and dyadic operations
909 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
910 # will never take this exception.
911 	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
912 	beq.b		funfl_extract		# monadic
913 
914 # now, what's left that's not dyadic is fsincos. we can distinguish it
915 # from all dyadics by the '0110xxx pattern
916 	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
917 	bne.b		funfl_extract		# yes
918 
919 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920 	bsr.l		load_fpn2		# load dst into FP_DST
921 
922 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
923 	bsr.l		set_tag_x		# tag the operand type
924 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
925 	bne.b		funfl_op2_done		# no
926 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
927 funfl_op2_done:
928 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
929 
930 funfl_extract:
931 
932 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
938 
939 	clr.l		%d0
940 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
941 
942 	mov.b		1+EXC_CMDREG(%a6),%d1
943 	andi.w		&0x007f,%d1		# extract extension
944 
945 	andi.l		&0x00ff01ff,USER_FPSR(%a6)
946 
947 	fmov.l		&0x0,%fpcr		# zero current control regs
948 	fmov.l		&0x0,%fpsr
949 
950 	lea		FP_SRC(%a6),%a0
951 	lea		FP_DST(%a6),%a1
952 
953 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
954 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
956 
957 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
958 	bsr.l		store_fpreg
959 
960 # The `060 FPU multiplier hardware is such that if the result of a
961 # multiply operation is the smallest possible normalized number
962 # (0x00000000_80000000_00000000), then the machine will take an
963 # underflow exception. Since this is incorrect, we need to check
964 # if our emulation, after re-doing the operation, decided that
965 # no underflow was called for. We do these checks only in
966 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967 # special case will simply exit gracefully with the correct result.
968 
969 # the exceptional possibilities we have left ourselves with are ONLY overflow
970 # and inexact. and, the inexact is such that overflow occurred and was disabled
971 # but inexact was enabled.
972 	btst		&unfl_bit,FPCR_ENABLE(%a6)
973 	bne.b		funfl_unfl_on
974 
975 funfl_chkinex:
976 	btst		&inex2_bit,FPCR_ENABLE(%a6)
977 	bne.b		funfl_inex_on
978 
979 funfl_exit:
980 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
981 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
983 
984 	unlk		%a6
985 #$#	add.l		&24,%sp
986 	bra.l		_fpsp_done
987 
988 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989 # in fp1 (don't forget to save fp0). what to do now?
990 # well, we simply have to get to go to _real_unfl()!
991 funfl_unfl_on:
992 
993 # The `060 FPU multiplier hardware is such that if the result of a
994 # multiply operation is the smallest possible normalized number
995 # (0x00000000_80000000_00000000), then the machine will take an
996 # underflow exception. Since this is incorrect, we check here to see
997 # if our emulation, after re-doing the operation, decided that
998 # no underflow was called for.
999 	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1000 	beq.w		funfl_chkinex
1001 
1002 funfl_unfl_on2:
1003 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1004 
1005 	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1006 
1007 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1008 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1010 
1011 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1012 
1013 	unlk		%a6
1014 
1015 	bra.l		_real_unfl
1016 
1017 # underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018 # we must jump to real_inex().
1019 funfl_inex_on:
1020 
1021 # The `060 FPU multiplier hardware is such that if the result of a
1022 # multiply operation is the smallest possible normalized number
1023 # (0x00000000_80000000_00000000), then the machine will take an
1024 # underflow exception.
1025 # But, whether bogus or not, if inexact is enabled AND it occurred,
1026 # then we have to branch to real_inex.
1027 
1028 	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1029 	beq.w		funfl_exit
1030 
1031 funfl_inex_on2:
1032 
1033 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1034 
1035 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1036 	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1037 
1038 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1039 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1041 
1042 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1043 
1044 	unlk		%a6
1045 
1046 	bra.l		_real_inex
1047 
1048 #######################################################################
1049 funfl_out:
1050 
1051 
1052 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055 
1056 # the src operand is definitely a NORM(!), so tag it as such
1057 	mov.b		&NORM,STAG(%a6)		# set src optype tag
1058 
1059 	clr.l		%d0
1060 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1061 
1062 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063 
1064 	fmov.l		&0x0,%fpcr		# zero current control regs
1065 	fmov.l		&0x0,%fpsr
1066 
1067 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1068 
1069 	bsr.l		fout
1070 
1071 	btst		&unfl_bit,FPCR_ENABLE(%a6)
1072 	bne.w		funfl_unfl_on2
1073 
1074 	btst		&inex2_bit,FPCR_ENABLE(%a6)
1075 	bne.w		funfl_inex_on2
1076 
1077 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1078 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1080 
1081 	unlk		%a6
1082 #$#	add.l		&24,%sp
1083 
1084 	btst		&0x7,(%sp)		# is trace on?
1085 	beq.l		_fpsp_done		# no
1086 
1087 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1088 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1089 	bra.l		_real_trace
1090 
1091 #########################################################################
1092 # XDEF ****************************************************************	#
1093 #	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1094 #		        Data Type" exception.				#
1095 #									#
1096 #	This handler should be the first code executed upon taking the	#
1097 #	FP Unimplemented Data Type exception in an operating system.	#
1098 #									#
1099 # XREF ****************************************************************	#
1100 #	_imem_read_{word,long}() - read instruction word/longword	#
1101 #	fix_skewed_ops() - adjust src operand in fsave frame		#
1102 #	set_tag_x() - determine optype of src/dst operands		#
1103 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1104 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1105 #	load_fpn2() - load dst operand from FP regfile			#
1106 #	load_fpn1() - load src operand from FP regfile			#
1107 #	fout() - emulate an opclass 3 instruction			#
1108 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1109 #	_real_inex() - "callout" to operating system inexact handler	#
1110 #	_fpsp_done() - "callout" for exit; work all done		#
1111 #	_real_trace() - "callout" for Trace enabled exception		#
1112 #	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1113 #	_real_snan() - "callout" for SNAN exception			#
1114 #	_real_operr() - "callout" for OPERR exception			#
1115 #	_real_ovfl() - "callout" for OVFL exception			#
1116 #	_real_unfl() - "callout" for UNFL exception			#
1117 #	get_packed() - fetch packed operand from memory			#
1118 #									#
1119 # INPUT ***************************************************************	#
1120 #	- The system stack contains the "Unimp Data Type" stk frame	#
1121 #	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1122 #									#
1123 # OUTPUT **************************************************************	#
1124 #	If Inexact exception (opclass 3):				#
1125 #	- The system stack is changed to an Inexact exception stk frame	#
1126 #	If SNAN exception (opclass 3):					#
1127 #	- The system stack is changed to an SNAN exception stk frame	#
1128 #	If OPERR exception (opclass 3):					#
1129 #	- The system stack is changed to an OPERR exception stk frame	#
1130 #	If OVFL exception (opclass 3):					#
1131 #	- The system stack is changed to an OVFL exception stk frame	#
1132 #	If UNFL exception (opclass 3):					#
1133 #	- The system stack is changed to an UNFL exception stack frame	#
1134 #	If Trace exception enabled:					#
1135 #	- The system stack is changed to a Trace exception stack frame	#
1136 #	Else: (normal case)						#
1137 #	- Correct result has been stored as appropriate			#
1138 #									#
1139 # ALGORITHM ***********************************************************	#
1140 #	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1141 # unimplemented data types. These can be either opclass 0,2 or 3	#
1142 # instructions, and (2) PACKED unimplemented data format instructions	#
1143 # also of opclasses 0,2, or 3.						#
1144 #	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1145 # operand from the fsave state frame and the dst operand (if dyadic)	#
1146 # from the FP register file. The instruction is then emulated by	#
1147 # choosing an emulation routine from a table of routines indexed by	#
1148 # instruction type. Once the instruction has been emulated and result	#
1149 # saved, then we check to see if any enabled exceptions resulted from	#
1150 # instruction emulation. If none, then we exit through the "callout"	#
1151 # _fpsp_done(). If there is an enabled FP exception, then we insert	#
1152 # this exception into the FPU in the fsave state frame and then exit	#
1153 # through _fpsp_done().							#
1154 #	PACKED opclass 0 and 2 is similar in how the instruction is	#
1155 # emulated and exceptions handled. The differences occur in how the	#
1156 # handler loads the packed op (by calling get_packed() routine) and	#
1157 # by the fact that a Trace exception could be pending for PACKED ops.	#
1158 # If a Trace exception is pending, then the current exception stack	#
1159 # frame is changed to a Trace exception stack frame and an exit is	#
1160 # made through _real_trace().						#
1161 #	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1162 # performed by calling the routine fout(). If no exception should occur	#
1163 # as the result of emulation, then an exit either occurs through	#
1164 # _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1165 # (a Trace stack frame must be created here, too). If an FP exception	#
1166 # should occur, then we must create an exception stack frame of that	#
1167 # type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1168 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1169 # emulation is performed in a similar manner.				#
1170 #									#
1171 #########################################################################
1172 
1173 #
1174 # (1) DENORM and UNNORM (unimplemented) data types:
1175 #
1176 #				post-instruction
1177 #				*****************
1178 #				*      EA	*
1179 #	 pre-instruction	*		*
1180 #	*****************	*****************
1181 #	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1182 #	*****************	*****************
1183 #	*     Next	*	*     Next	*
1184 #	*      PC	*	*      PC	*
1185 #	*****************	*****************
1186 #	*      SR	*	*      SR	*
1187 #	*****************	*****************
1188 #
1189 # (2) PACKED format (unsupported) opclasses two and three:
1190 #	*****************
1191 #	*      EA	*
1192 #	*		*
1193 #	*****************
1194 #	* 0x2 *  0x0dc	*
1195 #	*****************
1196 #	*     Next	*
1197 #	*      PC	*
1198 #	*****************
1199 #	*      SR	*
1200 #	*****************
1201 #
1202 	global		_fpsp_unsupp
1203 _fpsp_unsupp:
1204 
1205 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1206 
1207 	fsave		FP_SRC(%a6)		# save fp state
1208 
1209 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1210 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1212 
1213 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1214 	bne.b		fu_s
1215 fu_u:
1216 	mov.l		%usp,%a0		# fetch user stack pointer
1217 	mov.l		%a0,EXC_A7(%a6)		# save on stack
1218 	bra.b		fu_cont
1219 # if the exception is an opclass zero or two unimplemented data type
1220 # exception, then the a7' calculated here is wrong since it doesn't
1221 # stack an ea. however, we don't need an a7' for this case anyways.
1222 fu_s:
1223 	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1224 	mov.l		%a0,EXC_A7(%a6)		# save on stack
1225 
1226 fu_cont:
1227 
1228 # the FPIAR holds the "current PC" of the faulting instruction
1229 # the FPIAR should be set correctly for ALL exceptions passing through
1230 # this point.
1231 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1233 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1234 	bsr.l		_imem_read_long		# fetch the instruction words
1235 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1236 
1237 ############################
1238 
1239 	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1240 
1241 # Separate opclass three (fpn-to-mem) ops since they have a different
1242 # stack frame and protocol.
1243 	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1244 	bne.w		fu_out			# yes
1245 
1246 # Separate packed opclass two instructions.
1247 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1248 	cmpi.b		%d0,&0x13
1249 	beq.w		fu_in_pack
1250 
1251 
1252 # I'm not sure at this point what FPSR bits are valid for this instruction.
1253 # so, since the emulation routines re-create them anyways, zero exception field
1254 	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255 
1256 	fmov.l		&0x0,%fpcr		# zero current control regs
1257 	fmov.l		&0x0,%fpsr
1258 
1259 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260 # precision format if the src format was single or double and the
1261 # source data type was an INF, NAN, DENORM, or UNNORM
1262 	lea		FP_SRC(%a6),%a0		# pass ptr to input
1263 	bsr.l		fix_skewed_ops
1264 
1265 # we don't know whether the src operand or the dst operand (or both) is the
1266 # UNNORM or DENORM. call the function that tags the operand type. if the
1267 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1269 	bsr.l		set_tag_x		# tag the operand type
1270 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1271 	bne.b		fu_op2			# no
1272 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1273 
1274 fu_op2:
1275 	mov.b		%d0,STAG(%a6)		# save src optype tag
1276 
1277 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278 
1279 # bit five of the fp extension word separates the monadic and dyadic operations
1280 # at this point
1281 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1282 	beq.b		fu_extract		# monadic
1283 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1284 	beq.b		fu_extract		# yes, so it's monadic, too
1285 
1286 	bsr.l		load_fpn2		# load dst into FP_DST
1287 
1288 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1289 	bsr.l		set_tag_x		# tag the operand type
1290 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1291 	bne.b		fu_op2_done		# no
1292 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1293 fu_op2_done:
1294 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1295 
1296 fu_extract:
1297 	clr.l		%d0
1298 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1299 
1300 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301 
1302 	lea		FP_SRC(%a6),%a0
1303 	lea		FP_DST(%a6),%a1
1304 
1305 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1307 
1308 #
1309 # Exceptions in order of precedence:
1310 #	BSUN	: none
1311 #	SNAN	: all dyadic ops
1312 #	OPERR	: fsqrt(-NORM)
1313 #	OVFL	: all except ftst,fcmp
1314 #	UNFL	: all except ftst,fcmp
1315 #	DZ	: fdiv
1316 #	INEX2	: all except ftst,fcmp
1317 #	INEX1	: none (packed doesn't go through here)
1318 #
1319 
1320 # we determine the highest priority exception(if any) set by the
1321 # emulation routine that has also been enabled by the user.
1322 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1323 	bne.b		fu_in_ena		# some are enabled
1324 
1325 fu_in_cont:
1326 # fcmp and ftst do not store any result.
1327 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1328 	andi.b		&0x38,%d0		# extract bits 3-5
1329 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1330 	beq.b		fu_in_exit		# yes
1331 
1332 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333 	bsr.l		store_fpreg		# store the result
1334 
1335 fu_in_exit:
1336 
1337 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1338 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1340 
1341 	unlk		%a6
1342 
1343 	bra.l		_fpsp_done
1344 
1345 fu_in_ena:
1346 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1347 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1348 	bne.b		fu_in_exc		# there is at least one set
1349 
1350 #
1351 # No exceptions occurred that were also enabled. Now:
1352 #
1353 #	if (OVFL && ovfl_disabled && inexact_enabled) {
1354 #	    branch to _real_inex() (even if the result was exact!);
1355 #	} else {
1356 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1357 #	    return;
1358 #	}
1359 #
1360 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361 	beq.b		fu_in_cont		# no
1362 
1363 fu_in_ovflchk:
1364 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365 	beq.b		fu_in_cont		# no
1366 	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1367 
1368 #
1369 # An exception occurred and that exception was enabled:
1370 #
1371 #	shift enabled exception field into lo byte of d0;
1372 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374 #		/*
1375 #		 * this is the case where we must call _real_inex() now or else
1376 #		 * there will be no other way to pass it the exceptional operand
1377 #		 */
1378 #		call _real_inex();
1379 #	} else {
1380 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381 #	}
1382 #
1383 fu_in_exc:
1384 	subi.l		&24,%d0			# fix offset to be 0-8
1385 	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1386 	bne.b		fu_in_exc_exit		# no
1387 
1388 # the enabled exception was inexact
1389 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390 	bne.w		fu_in_exc_unfl		# yes
1391 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392 	bne.w		fu_in_exc_ovfl		# yes
1393 
1394 # here, we insert the correct fsave status value into the fsave frame for the
1395 # corresponding exception. the operand in the fsave frame should be the original
1396 # src operand.
1397 fu_in_exc_exit:
1398 	mov.l		%d0,-(%sp)		# save d0
1399 	bsr.l		funimp_skew		# skew sgl or dbl inputs
1400 	mov.l		(%sp)+,%d0		# restore d0
1401 
1402 	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403 
1404 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1405 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1407 
1408 	frestore	FP_SRC(%a6)		# restore src op
1409 
1410 	unlk		%a6
1411 
1412 	bra.l		_fpsp_done
1413 
1414 tbl_except:
1415 	short		0xe000,0xe006,0xe004,0xe005
1416 	short		0xe003,0xe002,0xe001,0xe001
1417 
1418 fu_in_exc_unfl:
1419 	mov.w		&0x4,%d0
1420 	bra.b		fu_in_exc_exit
1421 fu_in_exc_ovfl:
1422 	mov.w		&0x03,%d0
1423 	bra.b		fu_in_exc_exit
1424 
1425 # If the input operand to this operation was opclass two and a single
1426 # or double precision denorm, inf, or nan, the operand needs to be
1427 # "corrected" in order to have the proper equivalent extended precision
1428 # number.
1429 	global		fix_skewed_ops
1430 fix_skewed_ops:
1431 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432 	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1433 	beq.b		fso_sgl			# yes
1434 	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1435 	beq.b		fso_dbl			# yes
1436 	rts					# no
1437 
1438 fso_sgl:
1439 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1440 	andi.w		&0x7fff,%d0		# strip sign
1441 	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1442 	beq.b		fso_sgl_dnrm_zero	# yes
1443 	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1444 	beq.b		fso_infnan		# yes
1445 	rts					# no
1446 
1447 fso_sgl_dnrm_zero:
1448 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449 	beq.b		fso_zero		# it's a skewed zero
1450 fso_sgl_dnrm:
1451 # here, we count on norm not to alter a0...
1452 	bsr.l		norm			# normalize mantissa
1453 	neg.w		%d0			# -shft amt
1454 	addi.w		&0x3f81,%d0		# adjust new exponent
1455 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1456 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1457 	rts
1458 
1459 fso_zero:
1460 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1461 	rts
1462 
1463 fso_infnan:
1464 	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1465 	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1466 	rts
1467 
1468 fso_dbl:
1469 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1470 	andi.w		&0x7fff,%d0		# strip sign
1471 	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1472 	beq.b		fso_dbl_dnrm_zero	# yes
1473 	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1474 	beq.b		fso_infnan		# yes
1475 	rts					# no
1476 
1477 fso_dbl_dnrm_zero:
1478 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479 	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1480 	tst.l		LOCAL_LO(%a0)		# is it a zero?
1481 	beq.b		fso_zero		# yes
1482 fso_dbl_dnrm:
1483 # here, we count on norm not to alter a0...
1484 	bsr.l		norm			# normalize mantissa
1485 	neg.w		%d0			# -shft amt
1486 	addi.w		&0x3c01,%d0		# adjust new exponent
1487 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1488 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1489 	rts
1490 
1491 #################################################################
1492 
1493 # fmove out took an unimplemented data type exception.
1494 # the src operand is in FP_SRC. Call _fout() to write out the result and
1495 # to determine which exceptions, if any, to take.
1496 fu_out:
1497 
1498 # Separate packed move outs from the UNNORM and DENORM move outs.
1499 	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1500 	cmpi.b		%d0,&0x3
1501 	beq.w		fu_out_pack
1502 	cmpi.b		%d0,&0x7
1503 	beq.w		fu_out_pack
1504 
1505 
1506 # I'm not sure at this point what FPSR bits are valid for this instruction.
1507 # so, since the emulation routines re-create them anyways, zero exception field.
1508 # fmove out doesn't affect ccodes.
1509 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1510 
1511 	fmov.l		&0x0,%fpcr		# zero current control regs
1512 	fmov.l		&0x0,%fpsr
1513 
1514 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515 # call here. just figure out what it is...
1516 	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1517 	andi.w		&0x7fff,%d0		# strip sign
1518 	beq.b		fu_out_denorm		# it's a DENORM
1519 
1520 	lea		FP_SRC(%a6),%a0
1521 	bsr.l		unnorm_fix		# yes; fix it
1522 
1523 	mov.b		%d0,STAG(%a6)
1524 
1525 	bra.b		fu_out_cont
1526 fu_out_denorm:
1527 	mov.b		&DENORM,STAG(%a6)
1528 fu_out_cont:
1529 
1530 	clr.l		%d0
1531 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1532 
1533 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1534 
1535 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1536 	bsr.l		fout			# call fmove out routine
1537 
1538 # Exceptions in order of precedence:
1539 #	BSUN	: none
1540 #	SNAN	: none
1541 #	OPERR	: fmove.{b,w,l} out of large UNNORM
1542 #	OVFL	: fmove.{s,d}
1543 #	UNFL	: fmove.{s,d,x}
1544 #	DZ	: none
1545 #	INEX2	: all
1546 #	INEX1	: none (packed doesn't travel through here)
1547 
1548 # determine the highest priority exception(if any) set by the
1549 # emulation routine that has also been enabled by the user.
1550 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1551 	bne.w		fu_out_ena		# some are enabled
1552 
1553 fu_out_done:
1554 
1555 	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1556 
1557 # on extended precision opclass three instructions using pre-decrement or
1558 # post-increment addressing mode, the address register is not updated. is the
1559 # address register was the stack pointer used from user mode, then let's update
1560 # it here. if it was used from supervisor mode, then we have to handle this
1561 # as a special case.
1562 	btst		&0x5,EXC_SR(%a6)
1563 	bne.b		fu_out_done_s
1564 
1565 	mov.l		EXC_A7(%a6),%a0		# restore a7
1566 	mov.l		%a0,%usp
1567 
1568 fu_out_done_cont:
1569 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1570 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1572 
1573 	unlk		%a6
1574 
1575 	btst		&0x7,(%sp)		# is trace on?
1576 	bne.b		fu_out_trace		# yes
1577 
1578 	bra.l		_fpsp_done
1579 
1580 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581 # ("fmov.x fpm,-(a7)") if so,
1582 fu_out_done_s:
1583 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1584 	bne.b		fu_out_done_cont
1585 
1586 # the extended precision result is still in fp0. but, we need to save it
1587 # somewhere on the stack until we can copy it to its final resting place.
1588 # here, we're counting on the top of the stack to be the old place-holders
1589 # for fp0/fp1 which have already been restored. that way, we can write
1590 # over those destinations with the shifted stack frame.
1591 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1592 
1593 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1594 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1596 
1597 	mov.l		(%a6),%a6		# restore frame pointer
1598 
1599 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601 
1602 # now, copy the result to the proper place on the stack
1603 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606 
1607 	add.l		&LOCAL_SIZE-0x8,%sp
1608 
1609 	btst		&0x7,(%sp)
1610 	bne.b		fu_out_trace
1611 
1612 	bra.l		_fpsp_done
1613 
1614 fu_out_ena:
1615 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1616 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1617 	bne.b		fu_out_exc		# there is at least one set
1618 
1619 # no exceptions were set.
1620 # if a disabled overflow occurred and inexact was enabled but the result
1621 # was exact, then a branch to _real_inex() is made.
1622 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623 	beq.w		fu_out_done		# no
1624 
1625 fu_out_ovflchk:
1626 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627 	beq.w		fu_out_done		# no
1628 	bra.w		fu_inex			# yes
1629 
1630 #
1631 # The fp move out that took the "Unimplemented Data Type" exception was
1632 # being traced. Since the stack frames are similar, get the "current" PC
1633 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634 #
1635 #		  UNSUPP FRAME		   TRACE FRAME
1636 #		*****************	*****************
1637 #		*      EA	*	*    Current	*
1638 #		*		*	*      PC	*
1639 #		*****************	*****************
1640 #		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1641 #		*****************	*****************
1642 #		*     Next	*	*     Next	*
1643 #		*      PC	*	*      PC	*
1644 #		*****************	*****************
1645 #		*      SR	*	*      SR	*
1646 #		*****************	*****************
1647 #
1648 fu_out_trace:
1649 	mov.w		&0x2024,0x6(%sp)
1650 	fmov.l		%fpiar,0x8(%sp)
1651 	bra.l		_real_trace
1652 
1653 # an exception occurred and that exception was enabled.
1654 fu_out_exc:
1655 	subi.l		&24,%d0			# fix offset to be 0-8
1656 
1657 # we don't mess with the existing fsave frame. just re-insert it and
1658 # jump to the "_real_{}()" handler...
1659 	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1660 	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1661 
1662 	swbeg		&0x8
1663 tbl_fu_out:
1664 	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1665 	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1666 	short		fu_operr	- tbl_fu_out	# OPERR
1667 	short		fu_ovfl		- tbl_fu_out	# OVFL
1668 	short		fu_unfl		- tbl_fu_out	# UNFL
1669 	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1670 	short		fu_inex		- tbl_fu_out	# INEX2
1671 	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1672 
1673 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674 # frestore it.
1675 fu_snan:
1676 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1677 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1679 
1680 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1681 	mov.w		&0xe006,2+FP_SRC(%a6)
1682 
1683 	frestore	FP_SRC(%a6)
1684 
1685 	unlk		%a6
1686 
1687 
1688 	bra.l		_real_snan
1689 
1690 fu_operr:
1691 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1692 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1694 
1695 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1696 	mov.w		&0xe004,2+FP_SRC(%a6)
1697 
1698 	frestore	FP_SRC(%a6)
1699 
1700 	unlk		%a6
1701 
1702 
1703 	bra.l		_real_operr
1704 
1705 fu_ovfl:
1706 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1707 
1708 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1709 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1711 
1712 	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1713 	mov.w		&0xe005,2+FP_SRC(%a6)
1714 
1715 	frestore	FP_SRC(%a6)		# restore EXOP
1716 
1717 	unlk		%a6
1718 
1719 	bra.l		_real_ovfl
1720 
1721 # underflow can happen for extended precision. extended precision opclass
1722 # three instruction exceptions don't update the stack pointer. so, if the
1723 # exception occurred from user mode, then simply update a7 and exit normally.
1724 # if the exception occurred from supervisor mode, check if
1725 fu_unfl:
1726 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1727 
1728 	btst		&0x5,EXC_SR(%a6)
1729 	bne.w		fu_unfl_s
1730 
1731 	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1732 	mov.l		%a0,%usp		# to or not...
1733 
1734 fu_unfl_cont:
1735 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1736 
1737 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1738 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1740 
1741 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1742 	mov.w		&0xe003,2+FP_SRC(%a6)
1743 
1744 	frestore	FP_SRC(%a6)		# restore EXOP
1745 
1746 	unlk		%a6
1747 
1748 	bra.l		_real_unfl
1749 
1750 fu_unfl_s:
1751 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752 	bne.b		fu_unfl_cont
1753 
1754 # the extended precision result is still in fp0. but, we need to save it
1755 # somewhere on the stack until we can copy it to its final resting place
1756 # (where the exc frame is currently). make sure it's not at the top of the
1757 # frame or it will get overwritten when the exc stack frame is shifted "down".
1758 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1759 	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1760 
1761 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1762 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1764 
1765 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1766 	mov.w		&0xe003,2+FP_DST(%a6)
1767 
1768 	frestore	FP_DST(%a6)		# restore EXOP
1769 
1770 	mov.l		(%a6),%a6		# restore frame pointer
1771 
1772 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775 
1776 # now, copy the result to the proper place on the stack
1777 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780 
1781 	add.l		&LOCAL_SIZE-0x8,%sp
1782 
1783 	bra.l		_real_unfl
1784 
1785 # fmove in and out enter here.
1786 fu_inex:
1787 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1788 
1789 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1790 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1792 
1793 	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1794 	mov.w		&0xe001,2+FP_SRC(%a6)
1795 
1796 	frestore	FP_SRC(%a6)		# restore EXOP
1797 
1798 	unlk		%a6
1799 
1800 
1801 	bra.l		_real_inex
1802 
1803 #########################################################################
1804 #########################################################################
1805 fu_in_pack:
1806 
1807 
1808 # I'm not sure at this point what FPSR bits are valid for this instruction.
1809 # so, since the emulation routines re-create them anyways, zero exception field
1810 	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811 
1812 	fmov.l		&0x0,%fpcr		# zero current control regs
1813 	fmov.l		&0x0,%fpsr
1814 
1815 	bsr.l		get_packed		# fetch packed src operand
1816 
1817 	lea		FP_SRC(%a6),%a0		# pass ptr to src
1818 	bsr.l		set_tag_x		# set src optype tag
1819 
1820 	mov.b		%d0,STAG(%a6)		# save src optype tag
1821 
1822 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823 
1824 # bit five of the fp extension word separates the monadic and dyadic operations
1825 # at this point
1826 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1827 	beq.b		fu_extract_p		# monadic
1828 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1829 	beq.b		fu_extract_p		# yes, so it's monadic, too
1830 
1831 	bsr.l		load_fpn2		# load dst into FP_DST
1832 
1833 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1834 	bsr.l		set_tag_x		# tag the operand type
1835 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1836 	bne.b		fu_op2_done_p		# no
1837 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1838 fu_op2_done_p:
1839 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1840 
1841 fu_extract_p:
1842 	clr.l		%d0
1843 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1844 
1845 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846 
1847 	lea		FP_SRC(%a6),%a0
1848 	lea		FP_DST(%a6),%a1
1849 
1850 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1852 
1853 #
1854 # Exceptions in order of precedence:
1855 #	BSUN	: none
1856 #	SNAN	: all dyadic ops
1857 #	OPERR	: fsqrt(-NORM)
1858 #	OVFL	: all except ftst,fcmp
1859 #	UNFL	: all except ftst,fcmp
1860 #	DZ	: fdiv
1861 #	INEX2	: all except ftst,fcmp
1862 #	INEX1	: all
1863 #
1864 
1865 # we determine the highest priority exception(if any) set by the
1866 # emulation routine that has also been enabled by the user.
1867 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1868 	bne.w		fu_in_ena_p		# some are enabled
1869 
1870 fu_in_cont_p:
1871 # fcmp and ftst do not store any result.
1872 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1873 	andi.b		&0x38,%d0		# extract bits 3-5
1874 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1875 	beq.b		fu_in_exit_p		# yes
1876 
1877 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878 	bsr.l		store_fpreg		# store the result
1879 
1880 fu_in_exit_p:
1881 
1882 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1883 	bne.w		fu_in_exit_s_p		# supervisor
1884 
1885 	mov.l		EXC_A7(%a6),%a0		# update user a7
1886 	mov.l		%a0,%usp
1887 
1888 fu_in_exit_cont_p:
1889 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1890 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1892 
1893 	unlk		%a6			# unravel stack frame
1894 
1895 	btst		&0x7,(%sp)		# is trace on?
1896 	bne.w		fu_trace_p		# yes
1897 
1898 	bra.l		_fpsp_done		# exit to os
1899 
1900 # the exception occurred in supervisor mode. check to see if the
1901 # addressing mode was (a7)+. if so, we'll need to shift the
1902 # stack frame "up".
1903 fu_in_exit_s_p:
1904 	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905 	beq.b		fu_in_exit_cont_p	# no
1906 
1907 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1908 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1910 
1911 	unlk		%a6			# unravel stack frame
1912 
1913 # shift the stack frame "up". we don't really care about the <ea> field.
1914 	mov.l		0x4(%sp),0x10(%sp)
1915 	mov.l		0x0(%sp),0xc(%sp)
1916 	add.l		&0xc,%sp
1917 
1918 	btst		&0x7,(%sp)		# is trace on?
1919 	bne.w		fu_trace_p		# yes
1920 
1921 	bra.l		_fpsp_done		# exit to os
1922 
1923 fu_in_ena_p:
1924 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1925 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1926 	bne.b		fu_in_exc_p		# at least one was set
1927 
1928 #
1929 # No exceptions occurred that were also enabled. Now:
1930 #
1931 #	if (OVFL && ovfl_disabled && inexact_enabled) {
1932 #	    branch to _real_inex() (even if the result was exact!);
1933 #	} else {
1934 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1935 #	    return;
1936 #	}
1937 #
1938 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939 	beq.w		fu_in_cont_p		# no
1940 
1941 fu_in_ovflchk_p:
1942 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943 	beq.w		fu_in_cont_p		# no
1944 	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1945 
1946 #
1947 # An exception occurred and that exception was enabled:
1948 #
1949 #	shift enabled exception field into lo byte of d0;
1950 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952 #		/*
1953 #		 * this is the case where we must call _real_inex() now or else
1954 #		 * there will be no other way to pass it the exceptional operand
1955 #		 */
1956 #		call _real_inex();
1957 #	} else {
1958 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959 #	}
1960 #
1961 fu_in_exc_p:
1962 	subi.l		&24,%d0			# fix offset to be 0-8
1963 	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1964 	blt.b		fu_in_exc_exit_p	# no
1965 
1966 # the enabled exception was inexact
1967 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968 	bne.w		fu_in_exc_unfl_p	# yes
1969 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970 	bne.w		fu_in_exc_ovfl_p	# yes
1971 
1972 # here, we insert the correct fsave status value into the fsave frame for the
1973 # corresponding exception. the operand in the fsave frame should be the original
1974 # src operand.
1975 # as a reminder for future predicted pain and agony, we are passing in fsave the
1976 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978 fu_in_exc_exit_p:
1979 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1980 	bne.w		fu_in_exc_exit_s_p	# supervisor
1981 
1982 	mov.l		EXC_A7(%a6),%a0		# update user a7
1983 	mov.l		%a0,%usp
1984 
1985 fu_in_exc_exit_cont_p:
1986 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987 
1988 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1989 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1991 
1992 	frestore	FP_SRC(%a6)		# restore src op
1993 
1994 	unlk		%a6
1995 
1996 	btst		&0x7,(%sp)		# is trace enabled?
1997 	bne.w		fu_trace_p		# yes
1998 
1999 	bra.l		_fpsp_done
2000 
2001 tbl_except_p:
2002 	short		0xe000,0xe006,0xe004,0xe005
2003 	short		0xe003,0xe002,0xe001,0xe001
2004 
2005 fu_in_exc_ovfl_p:
2006 	mov.w		&0x3,%d0
2007 	bra.w		fu_in_exc_exit_p
2008 
2009 fu_in_exc_unfl_p:
2010 	mov.w		&0x4,%d0
2011 	bra.w		fu_in_exc_exit_p
2012 
2013 fu_in_exc_exit_s_p:
2014 	btst		&mia7_bit,SPCOND_FLG(%a6)
2015 	beq.b		fu_in_exc_exit_cont_p
2016 
2017 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018 
2019 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2020 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2022 
2023 	frestore	FP_SRC(%a6)		# restore src op
2024 
2025 	unlk		%a6			# unravel stack frame
2026 
2027 # shift stack frame "up". who cares about <ea> field.
2028 	mov.l		0x4(%sp),0x10(%sp)
2029 	mov.l		0x0(%sp),0xc(%sp)
2030 	add.l		&0xc,%sp
2031 
2032 	btst		&0x7,(%sp)		# is trace on?
2033 	bne.b		fu_trace_p		# yes
2034 
2035 	bra.l		_fpsp_done		# exit to os
2036 
2037 #
2038 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2040 # trace stack frame then jump to _real_trace().
2041 #
2042 #		  UNSUPP FRAME		   TRACE FRAME
2043 #		*****************	*****************
2044 #		*      EA	*	*    Current	*
2045 #		*		*	*      PC	*
2046 #		*****************	*****************
2047 #		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2048 #		*****************	*****************
2049 #		*     Next	*	*     Next	*
2050 #		*      PC	*	*      PC	*
2051 #		*****************	*****************
2052 #		*      SR	*	*      SR	*
2053 #		*****************	*****************
2054 fu_trace_p:
2055 	mov.w		&0x2024,0x6(%sp)
2056 	fmov.l		%fpiar,0x8(%sp)
2057 
2058 	bra.l		_real_trace
2059 
2060 #########################################################
2061 #########################################################
2062 fu_out_pack:
2063 
2064 
2065 # I'm not sure at this point what FPSR bits are valid for this instruction.
2066 # so, since the emulation routines re-create them anyways, zero exception field.
2067 # fmove out doesn't affect ccodes.
2068 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2069 
2070 	fmov.l		&0x0,%fpcr		# zero current control regs
2071 	fmov.l		&0x0,%fpsr
2072 
2073 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2074 	bsr.l		load_fpn1
2075 
2076 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2077 # able to detect all operand types.
2078 	lea		FP_SRC(%a6),%a0
2079 	bsr.l		set_tag_x		# tag the operand type
2080 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2081 	bne.b		fu_op2_p		# no
2082 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2083 
2084 fu_op2_p:
2085 	mov.b		%d0,STAG(%a6)		# save src optype tag
2086 
2087 	clr.l		%d0
2088 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2089 
2090 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2091 
2092 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2093 	bsr.l		fout			# call fmove out routine
2094 
2095 # Exceptions in order of precedence:
2096 #	BSUN	: no
2097 #	SNAN	: yes
2098 #	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099 #	OVFL	: no
2100 #	UNFL	: no
2101 #	DZ	: no
2102 #	INEX2	: yes
2103 #	INEX1	: no
2104 
2105 # determine the highest priority exception(if any) set by the
2106 # emulation routine that has also been enabled by the user.
2107 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2108 	bne.w		fu_out_ena_p		# some are enabled
2109 
2110 fu_out_exit_p:
2111 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2112 
2113 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2114 	bne.b		fu_out_exit_s_p		# supervisor
2115 
2116 	mov.l		EXC_A7(%a6),%a0		# update user a7
2117 	mov.l		%a0,%usp
2118 
2119 fu_out_exit_cont_p:
2120 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2121 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2123 
2124 	unlk		%a6			# unravel stack frame
2125 
2126 	btst		&0x7,(%sp)		# is trace on?
2127 	bne.w		fu_trace_p		# yes
2128 
2129 	bra.l		_fpsp_done		# exit to os
2130 
2131 # the exception occurred in supervisor mode. check to see if the
2132 # addressing mode was -(a7). if so, we'll need to shift the
2133 # stack frame "down".
2134 fu_out_exit_s_p:
2135 	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136 	beq.b		fu_out_exit_cont_p	# no
2137 
2138 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2139 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2141 
2142 	mov.l		(%a6),%a6		# restore frame pointer
2143 
2144 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146 
2147 # now, copy the result to the proper place on the stack
2148 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151 
2152 	add.l		&LOCAL_SIZE-0x8,%sp
2153 
2154 	btst		&0x7,(%sp)
2155 	bne.w		fu_trace_p
2156 
2157 	bra.l		_fpsp_done
2158 
2159 fu_out_ena_p:
2160 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2161 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2162 	beq.w		fu_out_exit_p
2163 
2164 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2165 
2166 # an exception occurred and that exception was enabled.
2167 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168 fu_out_exc_p:
2169 	cmpi.b		%d0,&0x1a
2170 	bgt.w		fu_inex_p2
2171 	beq.w		fu_operr_p
2172 
2173 fu_snan_p:
2174 	btst		&0x5,EXC_SR(%a6)
2175 	bne.b		fu_snan_s_p
2176 
2177 	mov.l		EXC_A7(%a6),%a0
2178 	mov.l		%a0,%usp
2179 	bra.w		fu_snan
2180 
2181 fu_snan_s_p:
2182 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2183 	bne.w		fu_snan
2184 
2185 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186 # the strategy is to move the exception frame "down" 12 bytes. then, we
2187 # can store the default result where the exception frame was.
2188 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2189 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2191 
2192 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2193 	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2194 
2195 	frestore	FP_SRC(%a6)		# restore src operand
2196 
2197 	mov.l		(%a6),%a6		# restore frame pointer
2198 
2199 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202 
2203 # now, we copy the default result to its proper location
2204 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207 
2208 	add.l		&LOCAL_SIZE-0x8,%sp
2209 
2210 
2211 	bra.l		_real_snan
2212 
2213 fu_operr_p:
2214 	btst		&0x5,EXC_SR(%a6)
2215 	bne.w		fu_operr_p_s
2216 
2217 	mov.l		EXC_A7(%a6),%a0
2218 	mov.l		%a0,%usp
2219 	bra.w		fu_operr
2220 
2221 fu_operr_p_s:
2222 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2223 	bne.w		fu_operr
2224 
2225 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226 # the strategy is to move the exception frame "down" 12 bytes. then, we
2227 # can store the default result where the exception frame was.
2228 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2229 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2231 
2232 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2233 	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2234 
2235 	frestore	FP_SRC(%a6)		# restore src operand
2236 
2237 	mov.l		(%a6),%a6		# restore frame pointer
2238 
2239 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242 
2243 # now, we copy the default result to its proper location
2244 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247 
2248 	add.l		&LOCAL_SIZE-0x8,%sp
2249 
2250 
2251 	bra.l		_real_operr
2252 
2253 fu_inex_p2:
2254 	btst		&0x5,EXC_SR(%a6)
2255 	bne.w		fu_inex_s_p2
2256 
2257 	mov.l		EXC_A7(%a6),%a0
2258 	mov.l		%a0,%usp
2259 	bra.w		fu_inex
2260 
2261 fu_inex_s_p2:
2262 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2263 	bne.w		fu_inex
2264 
2265 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266 # the strategy is to move the exception frame "down" 12 bytes. then, we
2267 # can store the default result where the exception frame was.
2268 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2269 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2271 
2272 	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2273 	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2274 
2275 	frestore	FP_SRC(%a6)		# restore src operand
2276 
2277 	mov.l		(%a6),%a6		# restore frame pointer
2278 
2279 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282 
2283 # now, we copy the default result to its proper location
2284 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287 
2288 	add.l		&LOCAL_SIZE-0x8,%sp
2289 
2290 
2291 	bra.l		_real_inex
2292 
2293 #########################################################################
2294 
2295 #
2296 # if we're stuffing a source operand back into an fsave frame then we
2297 # have to make sure that for single or double source operands that the
2298 # format stuffed is as weird as the hardware usually makes it.
2299 #
2300 	global		funimp_skew
2301 funimp_skew:
2302 	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303 	cmpi.b		%d0,&0x1		# was src sgl?
2304 	beq.b		funimp_skew_sgl		# yes
2305 	cmpi.b		%d0,&0x5		# was src dbl?
2306 	beq.b		funimp_skew_dbl		# yes
2307 	rts
2308 
2309 funimp_skew_sgl:
2310 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2311 	andi.w		&0x7fff,%d0		# strip sign
2312 	beq.b		funimp_skew_sgl_not
2313 	cmpi.w		%d0,&0x3f80
2314 	bgt.b		funimp_skew_sgl_not
2315 	neg.w		%d0			# make exponent negative
2316 	addi.w		&0x3f81,%d0		# find amt to shift
2317 	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2318 	lsr.l		%d0,%d1			# shift it
2319 	bset		&31,%d1			# set j-bit
2320 	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2321 	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2322 	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2323 funimp_skew_sgl_not:
2324 	rts
2325 
2326 funimp_skew_dbl:
2327 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2328 	andi.w		&0x7fff,%d0		# strip sign
2329 	beq.b		funimp_skew_dbl_not
2330 	cmpi.w		%d0,&0x3c00
2331 	bgt.b		funimp_skew_dbl_not
2332 
2333 	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2334 	smi.b		0x2+FP_SRC(%a6)
2335 	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2336 	clr.l		%d0			# clear g,r,s
2337 	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2338 	mov.w		&0x3c01,%d1		# pass denorm threshold
2339 	bsr.l		dnrm_lp			# denorm it
2340 	mov.w		&0x3c00,%d0		# new exponent
2341 	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2342 	beq.b		fss_dbl_denorm_done	# no
2343 	bset		&15,%d0			# set sign
2344 fss_dbl_denorm_done:
2345 	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2346 	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2347 funimp_skew_dbl_not:
2348 	rts
2349 
2350 #########################################################################
2351 	global		_mem_write2
2352 _mem_write2:
2353 	btst		&0x5,EXC_SR(%a6)
2354 	beq.l		_dmem_write
2355 	mov.l		0x0(%a0),FP_DST_EX(%a6)
2356 	mov.l		0x4(%a0),FP_DST_HI(%a6)
2357 	mov.l		0x8(%a0),FP_DST_LO(%a6)
2358 	clr.l		%d1
2359 	rts
2360 
2361 #########################################################################
2362 # XDEF ****************************************************************	#
2363 #	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2364 #			effective address" exception.			#
2365 #									#
2366 #	This handler should be the first code executed upon taking the	#
2367 #	FP Unimplemented Effective Address exception in an operating	#
2368 #	system.								#
2369 #									#
2370 # XREF ****************************************************************	#
2371 #	_imem_read_long() - read instruction longword			#
2372 #	fix_skewed_ops() - adjust src operand in fsave frame		#
2373 #	set_tag_x() - determine optype of src/dst operands		#
2374 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2375 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2376 #	load_fpn2() - load dst operand from FP regfile			#
2377 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2378 #	decbin() - convert packed data to FP binary data		#
2379 #	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2380 #	_real_access() - "callout" for access error exception		#
2381 #	_mem_read() - read extended immediate operand from memory	#
2382 #	_fpsp_done() - "callout" for exit; work all done		#
2383 #	_real_trace() - "callout" for Trace enabled exception		#
2384 #	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2385 #	fmovm_ctrl() - emulate fmovm control instruction		#
2386 #									#
2387 # INPUT ***************************************************************	#
2388 #	- The system stack contains the "Unimplemented <ea>" stk frame	#
2389 #									#
2390 # OUTPUT **************************************************************	#
2391 #	If access error:						#
2392 #	- The system stack is changed to an access error stack frame	#
2393 #	If FPU disabled:						#
2394 #	- The system stack is changed to an FPU disabled stack frame	#
2395 #	If Trace exception enabled:					#
2396 #	- The system stack is changed to a Trace exception stack frame	#
2397 #	Else: (normal case)						#
2398 #	- None (correct result has been stored as appropriate)		#
2399 #									#
2400 # ALGORITHM ***********************************************************	#
2401 #	This exception handles 3 types of operations:			#
2402 # (1) FP Instructions using extended precision or packed immediate	#
2403 #     addressing mode.							#
2404 # (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2405 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2406 #									#
2407 #	For immediate data operations, the data is read in w/ a		#
2408 # _mem_read() "callout", converted to FP binary (if packed), and used	#
2409 # as the source operand to the instruction specified by the instruction	#
2410 # word. If no FP exception should be reported ads a result of the	#
2411 # emulation, then the result is stored to the destination register and	#
2412 # the handler exits through _fpsp_done(). If an enabled exc has been	#
2413 # signalled as a result of emulation, then an fsave state frame		#
2414 # corresponding to the FP exception type must be entered into the 060	#
2415 # FPU before exiting. In either the enabled or disabled cases, we	#
2416 # must also check if a Trace exception is pending, in which case, we	#
2417 # must create a Trace exception stack frame from the current exception	#
2418 # stack frame. If no Trace is pending, we simply exit through		#
2419 # _fpsp_done().								#
2420 #	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2421 # decode and emulate the instruction. No FP exceptions can be pending	#
2422 # as a result of this operation emulation. A Trace exception can be	#
2423 # pending, though, which means the current stack frame must be changed	#
2424 # to a Trace stack frame and an exit made through _real_trace().	#
2425 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2426 # was executed from supervisor mode, this handler must store the FP	#
2427 # register file values to the system stack by itself since		#
2428 # fmovm_dynamic() can't handle this. A normal exit is made through	#
2429 # fpsp_done().								#
2430 #	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2431 # Again, a Trace exception may be pending and an exit made through	#
2432 # _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2433 #									#
2434 #	Before any of the above is attempted, it must be checked to	#
2435 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2436 # before the "FPU disabled" exception, but the "FPU disabled" exception	#
2437 # has higher priority, we check the disabled bit in the PCR. If set,	#
2438 # then we must create an 8 word "FPU disabled" exception stack frame	#
2439 # from the current 4 word exception stack frame. This includes		#
2440 # reproducing the effective address of the instruction to put on the	#
2441 # new stack frame.							#
2442 #									#
2443 #	In the process of all emulation work, if a _mem_read()		#
2444 # "callout" returns a failing result indicating an access error, then	#
2445 # we must create an access error stack frame from the current stack	#
2446 # frame. This information includes a faulting address and a fault-	#
2447 # status-longword. These are created within this handler.		#
2448 #									#
2449 #########################################################################
2450 
2451 	global		_fpsp_effadd
2452 _fpsp_effadd:
2453 
2454 # This exception type takes priority over the "Line F Emulator"
2455 # exception. Therefore, the FPU could be disabled when entering here.
2456 # So, we must check to see if it's disabled and handle that case separately.
2457 	mov.l		%d0,-(%sp)		# save d0
2458 	movc		%pcr,%d0		# load proc cr
2459 	btst		&0x1,%d0		# is FPU disabled?
2460 	bne.w		iea_disabled		# yes
2461 	mov.l		(%sp)+,%d0		# restore d0
2462 
2463 	link		%a6,&-LOCAL_SIZE	# init stack frame
2464 
2465 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2466 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2468 
2469 # PC of instruction that took the exception is the PC in the frame
2470 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471 
2472 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2473 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2474 	bsr.l		_imem_read_long		# fetch the instruction words
2475 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2476 
2477 #########################################################################
2478 
2479 	tst.w		%d0			# is operation fmovem?
2480 	bmi.w		iea_fmovm		# yes
2481 
2482 #
2483 # here, we will have:
2484 #	fabs	fdabs	fsabs		facos		fmod
2485 #	fadd	fdadd	fsadd		fasin		frem
2486 #	fcmp				fatan		fscale
2487 #	fdiv	fddiv	fsdiv		fatanh		fsin
2488 #	fint				fcos		fsincos
2489 #	fintrz				fcosh		fsinh
2490 #	fmove	fdmove	fsmove		fetox		ftan
2491 #	fmul	fdmul	fsmul		fetoxm1		ftanh
2492 #	fneg	fdneg	fsneg		fgetexp		ftentox
2493 #	fsgldiv				fgetman		ftwotox
2494 #	fsglmul				flog10
2495 #	fsqrt				flog2
2496 #	fsub	fdsub	fssub		flogn
2497 #	ftst				flognp1
2498 # which can all use f<op>.{x,p}
2499 # so, now it's immediate data extended precision AND PACKED FORMAT!
2500 #
2501 iea_op:
2502 	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2503 
2504 	btst		&0xa,%d0		# is src fmt x or p?
2505 	bne.b		iea_op_pack		# packed
2506 
2507 
2508 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2509 	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2510 	mov.l		&0xc,%d0		# pass: 12 bytes
2511 	bsr.l		_imem_read		# read extended immediate
2512 
2513 	tst.l		%d1			# did ifetch fail?
2514 	bne.w		iea_iacc		# yes
2515 
2516 	bra.b		iea_op_setsrc
2517 
2518 iea_op_pack:
2519 
2520 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2521 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2522 	mov.l		&0xc,%d0		# pass: 12 bytes
2523 	bsr.l		_imem_read		# read packed operand
2524 
2525 	tst.l		%d1			# did ifetch fail?
2526 	bne.w		iea_iacc		# yes
2527 
2528 # The packed operand is an INF or a NAN if the exponent field is all ones.
2529 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2530 	cmpi.w		%d0,&0x7fff		# INF or NAN?
2531 	beq.b		iea_op_setsrc		# operand is an INF or NAN
2532 
2533 # The packed operand is a zero if the mantissa is all zero, else it's
2534 # a normal packed op.
2535 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2536 	andi.b		&0x0f,%d0		# clear all but last nybble
2537 	bne.b		iea_op_gp_not_spec	# not a zero
2538 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2539 	bne.b		iea_op_gp_not_spec	# not a zero
2540 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2541 	beq.b		iea_op_setsrc		# operand is a ZERO
2542 iea_op_gp_not_spec:
2543 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2544 	bsr.l		decbin			# convert to extended
2545 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2546 
2547 iea_op_setsrc:
2548 	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2549 
2550 # FP_SRC now holds the src operand.
2551 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2552 	bsr.l		set_tag_x		# tag the operand type
2553 	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2554 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2555 	bne.b		iea_op_getdst		# no
2556 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2557 	mov.b		%d0,STAG(%a6)		# set new optype tag
2558 iea_op_getdst:
2559 	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2560 
2561 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2562 	beq.b		iea_op_extract		# monadic
2563 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2564 	bne.b		iea_op_spec		# yes
2565 
2566 iea_op_loaddst:
2567 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568 	bsr.l		load_fpn2		# load dst operand
2569 
2570 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2571 	bsr.l		set_tag_x		# tag the operand type
2572 	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2573 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2574 	bne.b		iea_op_extract		# no
2575 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2576 	mov.b		%d0,DTAG(%a6)		# set new optype tag
2577 	bra.b		iea_op_extract
2578 
2579 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580 iea_op_spec:
2581 	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2582 	beq.b		iea_op_extract		# yes
2583 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584 # store a result. then, only fcmp will branch back and pick up a dst operand.
2585 	st		STORE_FLG(%a6)		# don't store a final result
2586 	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2587 	beq.b		iea_op_loaddst		# yes
2588 
2589 iea_op_extract:
2590 	clr.l		%d0
2591 	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2592 
2593 	mov.b		1+EXC_CMDREG(%a6),%d1
2594 	andi.w		&0x007f,%d1		# extract extension
2595 
2596 	fmov.l		&0x0,%fpcr
2597 	fmov.l		&0x0,%fpsr
2598 
2599 	lea		FP_SRC(%a6),%a0
2600 	lea		FP_DST(%a6),%a1
2601 
2602 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2604 
2605 #
2606 # Exceptions in order of precedence:
2607 #	BSUN	: none
2608 #	SNAN	: all operations
2609 #	OPERR	: all reg-reg or mem-reg operations that can normally operr
2610 #	OVFL	: same as OPERR
2611 #	UNFL	: same as OPERR
2612 #	DZ	: same as OPERR
2613 #	INEX2	: same as OPERR
2614 #	INEX1	: all packed immediate operations
2615 #
2616 
2617 # we determine the highest priority exception(if any) set by the
2618 # emulation routine that has also been enabled by the user.
2619 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2620 	bne.b		iea_op_ena		# some are enabled
2621 
2622 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2623 # these don't save results.
2624 iea_op_save:
2625 	tst.b		STORE_FLG(%a6)		# does this op store a result?
2626 	bne.b		iea_op_exit1		# exit with no frestore
2627 
2628 iea_op_store:
2629 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630 	bsr.l		store_fpreg		# store the result
2631 
2632 iea_op_exit1:
2633 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635 
2636 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2637 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2639 
2640 	unlk		%a6			# unravel the frame
2641 
2642 	btst		&0x7,(%sp)		# is trace on?
2643 	bne.w		iea_op_trace		# yes
2644 
2645 	bra.l		_fpsp_done		# exit to os
2646 
2647 iea_op_ena:
2648 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2649 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2650 	bne.b		iea_op_exc		# at least one was set
2651 
2652 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2653 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2654 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655 	beq.b		iea_op_save
2656 
2657 iea_op_ovfl:
2658 	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659 	beq.b		iea_op_store		# no
2660 	bra.b		iea_op_exc_ovfl		# yes
2661 
2662 # an enabled exception occurred. we have to insert the exception type back into
2663 # the machine.
2664 iea_op_exc:
2665 	subi.l		&24,%d0			# fix offset to be 0-8
2666 	cmpi.b		%d0,&0x6		# is exception INEX?
2667 	bne.b		iea_op_exc_force	# no
2668 
2669 # the enabled exception was inexact. so, if it occurs with an overflow
2670 # or underflow that was disabled, then we have to force an overflow or
2671 # underflow frame.
2672 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673 	bne.b		iea_op_exc_ovfl		# yes
2674 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675 	bne.b		iea_op_exc_unfl		# yes
2676 
2677 iea_op_exc_force:
2678 	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679 	bra.b		iea_op_exit2		# exit with frestore
2680 
2681 tbl_iea_except:
2682 	short		0xe002, 0xe006, 0xe004, 0xe005
2683 	short		0xe003, 0xe002, 0xe001, 0xe001
2684 
2685 iea_op_exc_ovfl:
2686 	mov.w		&0xe005,2+FP_SRC(%a6)
2687 	bra.b		iea_op_exit2
2688 
2689 iea_op_exc_unfl:
2690 	mov.w		&0xe003,2+FP_SRC(%a6)
2691 
2692 iea_op_exit2:
2693 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695 
2696 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2697 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2699 
2700 	frestore	FP_SRC(%a6)		# restore exceptional state
2701 
2702 	unlk		%a6			# unravel the frame
2703 
2704 	btst		&0x7,(%sp)		# is trace on?
2705 	bne.b		iea_op_trace		# yes
2706 
2707 	bra.l		_fpsp_done		# exit to os
2708 
2709 #
2710 # The opclass two instruction that took an "Unimplemented Effective Address"
2711 # exception was being traced. Make the "current" PC the FPIAR and put it in
2712 # the trace stack frame then jump to _real_trace().
2713 #
2714 #		 UNIMP EA FRAME		   TRACE FRAME
2715 #		*****************	*****************
2716 #		* 0x0 *  0x0f0	*	*    Current	*
2717 #		*****************	*      PC	*
2718 #		*    Current	*	*****************
2719 #		*      PC	*	* 0x2 *  0x024	*
2720 #		*****************	*****************
2721 #		*      SR	*	*     Next	*
2722 #		*****************	*      PC	*
2723 #					*****************
2724 #					*      SR	*
2725 #					*****************
2726 iea_op_trace:
2727 	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2728 	mov.w		0x8(%sp),0x4(%sp)
2729 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2730 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2731 
2732 	bra.l		_real_trace
2733 
2734 #########################################################################
2735 iea_fmovm:
2736 	btst		&14,%d0			# ctrl or data reg
2737 	beq.w		iea_fmovm_ctrl
2738 
2739 iea_fmovm_data:
2740 
2741 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2742 	bne.b		iea_fmovm_data_s
2743 
2744 iea_fmovm_data_u:
2745 	mov.l		%usp,%a0
2746 	mov.l		%a0,EXC_A7(%a6)		# store current a7
2747 	bsr.l		fmovm_dynamic		# do dynamic fmovm
2748 	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2749 	mov.l		%a0,%usp		# update usp
2750 	bra.w		iea_fmovm_exit
2751 
2752 iea_fmovm_data_s:
2753 	clr.b		SPCOND_FLG(%a6)
2754 	lea		0x2+EXC_VOFF(%a6),%a0
2755 	mov.l		%a0,EXC_A7(%a6)
2756 	bsr.l		fmovm_dynamic		# do dynamic fmovm
2757 
2758 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2759 	beq.w		iea_fmovm_data_predec
2760 	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2761 	bne.w		iea_fmovm_exit
2762 
2763 # right now, d0 = the size.
2764 # the data has been fetched from the supervisor stack, but we have not
2765 # incremented the stack pointer by the appropriate number of bytes.
2766 # do it here.
2767 iea_fmovm_data_postinc:
2768 	btst		&0x7,EXC_SR(%a6)
2769 	bne.b		iea_fmovm_data_pi_trace
2770 
2771 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2774 
2775 	lea		(EXC_SR,%a6,%d0),%a0
2776 	mov.l		%a0,EXC_SR(%a6)
2777 
2778 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2779 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2781 
2782 	unlk		%a6
2783 	mov.l		(%sp)+,%sp
2784 	bra.l		_fpsp_done
2785 
2786 iea_fmovm_data_pi_trace:
2787 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790 	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791 
2792 	lea		(EXC_SR-0x4,%a6,%d0),%a0
2793 	mov.l		%a0,EXC_SR(%a6)
2794 
2795 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2796 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2798 
2799 	unlk		%a6
2800 	mov.l		(%sp)+,%sp
2801 	bra.l		_real_trace
2802 
2803 # right now, d1 = size and d0 = the strg.
2804 iea_fmovm_data_predec:
2805 	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2806 	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2807 
2808 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2809 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2811 
2812 	mov.l		(%a6),-(%sp)		# make a copy of a6
2813 	mov.l		%d0,-(%sp)		# save d0
2814 	mov.l		%d1,-(%sp)		# save d1
2815 	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2816 
2817 	clr.l		%d0
2818 	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2819 	neg.l		%d0			# get negative of size
2820 
2821 	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2822 	beq.b		iea_fmovm_data_p2
2823 
2824 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825 	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826 	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2827 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828 
2829 	pea		(%a6,%d0)		# create final sp
2830 	bra.b		iea_fmovm_data_p3
2831 
2832 iea_fmovm_data_p2:
2833 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834 	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2835 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2836 
2837 	pea		(0x4,%a6,%d0)		# create final sp
2838 
2839 iea_fmovm_data_p3:
2840 	clr.l		%d1
2841 	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2842 
2843 	tst.b		%d1
2844 	bpl.b		fm_1
2845 	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2846 	addi.l		&0xc,%d0
2847 fm_1:
2848 	lsl.b		&0x1,%d1
2849 	bpl.b		fm_2
2850 	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2851 	addi.l		&0xc,%d0
2852 fm_2:
2853 	lsl.b		&0x1,%d1
2854 	bpl.b		fm_3
2855 	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2856 	addi.l		&0xc,%d0
2857 fm_3:
2858 	lsl.b		&0x1,%d1
2859 	bpl.b		fm_4
2860 	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2861 	addi.l		&0xc,%d0
2862 fm_4:
2863 	lsl.b		&0x1,%d1
2864 	bpl.b		fm_5
2865 	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2866 	addi.l		&0xc,%d0
2867 fm_5:
2868 	lsl.b		&0x1,%d1
2869 	bpl.b		fm_6
2870 	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2871 	addi.l		&0xc,%d0
2872 fm_6:
2873 	lsl.b		&0x1,%d1
2874 	bpl.b		fm_7
2875 	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2876 	addi.l		&0xc,%d0
2877 fm_7:
2878 	lsl.b		&0x1,%d1
2879 	bpl.b		fm_end
2880 	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2881 fm_end:
2882 	mov.l		0x4(%sp),%d1
2883 	mov.l		0x8(%sp),%d0
2884 	mov.l		0xc(%sp),%a6
2885 	mov.l		(%sp)+,%sp
2886 
2887 	btst		&0x7,(%sp)		# is trace enabled?
2888 	beq.l		_fpsp_done
2889 	bra.l		_real_trace
2890 
2891 #########################################################################
2892 iea_fmovm_ctrl:
2893 
2894 	bsr.l		fmovm_ctrl		# load ctrl regs
2895 
2896 iea_fmovm_exit:
2897 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2898 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2900 
2901 	btst		&0x7,EXC_SR(%a6)	# is trace on?
2902 	bne.b		iea_fmovm_trace		# yes
2903 
2904 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905 
2906 	unlk		%a6			# unravel the frame
2907 
2908 	bra.l		_fpsp_done		# exit to os
2909 
2910 #
2911 # The control reg instruction that took an "Unimplemented Effective Address"
2912 # exception was being traced. The "Current PC" for the trace frame is the
2913 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914 # After fixing the stack frame, jump to _real_trace().
2915 #
2916 #		 UNIMP EA FRAME		   TRACE FRAME
2917 #		*****************	*****************
2918 #		* 0x0 *  0x0f0	*	*    Current	*
2919 #		*****************	*      PC	*
2920 #		*    Current	*	*****************
2921 #		*      PC	*	* 0x2 *  0x024	*
2922 #		*****************	*****************
2923 #		*      SR	*	*     Next	*
2924 #		*****************	*      PC	*
2925 #					*****************
2926 #					*      SR	*
2927 #					*****************
2928 # this ain't a pretty solution, but it works:
2929 # -restore a6 (not with unlk)
2930 # -shift stack frame down over where old a6 used to be
2931 # -add LOCAL_SIZE to stack pointer
2932 iea_fmovm_trace:
2933 	mov.l		(%a6),%a6		# restore frame pointer
2934 	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935 	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936 	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937 	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938 	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2939 
2940 	bra.l		_real_trace
2941 
2942 #########################################################################
2943 # The FPU is disabled and so we should really have taken the "Line
2944 # F Emulator" exception. So, here we create an 8-word stack frame
2945 # from our 4-word stack frame. This means we must calculate the length
2946 # the faulting instruction to get the "next PC". This is trivial for
2947 # immediate operands but requires some extra work for fmovm dynamic
2948 # which can use most addressing modes.
2949 iea_disabled:
2950 	mov.l		(%sp)+,%d0		# restore d0
2951 
2952 	link		%a6,&-LOCAL_SIZE	# init stack frame
2953 
2954 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2955 
2956 # PC of instruction that took the exception is the PC in the frame
2957 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2959 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2960 	bsr.l		_imem_read_long		# fetch the instruction words
2961 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2962 
2963 	tst.w		%d0			# is instr fmovm?
2964 	bmi.b		iea_dis_fmovm		# yes
2965 # instruction is using an extended precision immediate operand. Therefore,
2966 # the total instruction length is 16 bytes.
2967 iea_dis_immed:
2968 	mov.l		&0x10,%d0		# 16 bytes of instruction
2969 	bra.b		iea_dis_cont
2970 iea_dis_fmovm:
2971 	btst		&0xe,%d0		# is instr fmovm ctrl
2972 	bne.b		iea_dis_fmovm_data	# no
2973 # the instruction is a fmovm.l with 2 or 3 registers.
2974 	bfextu		%d0{&19:&3},%d1
2975 	mov.l		&0xc,%d0
2976 	cmpi.b		%d1,&0x7		# move all regs?
2977 	bne.b		iea_dis_cont
2978 	addq.l		&0x4,%d0
2979 	bra.b		iea_dis_cont
2980 # the instruction is an fmovm.x dynamic which can use many addressing
2981 # modes and thus can have several different total instruction lengths.
2982 # call fmovm_calc_ea which will go through the ea calc process and,
2983 # as a by-product, will tell us how long the instruction is.
2984 iea_dis_fmovm_data:
2985 	clr.l		%d0
2986 	bsr.l		fmovm_calc_ea
2987 	mov.l		EXC_EXTWPTR(%a6),%d0
2988 	sub.l		EXC_PC(%a6),%d0
2989 iea_dis_cont:
2990 	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2991 
2992 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2993 
2994 	unlk		%a6
2995 
2996 # here, we actually create the 8-word frame from the 4-word frame,
2997 # with the "next PC" as additional info.
2998 # the <ea> field is let as undefined.
2999 	subq.l		&0x8,%sp		# make room for new stack
3000 	mov.l		%d0,-(%sp)		# save d0
3001 	mov.w		0xc(%sp),0x4(%sp)	# move SR
3002 	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3003 	clr.l		%d0
3004 	mov.w		0x12(%sp),%d0
3005 	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3006 	add.l		%d0,0x6(%sp)		# make Next PC
3007 	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3008 	mov.l		(%sp)+,%d0		# restore d0
3009 
3010 	bra.l		_real_fpu_disabled
3011 
3012 ##########
3013 
3014 iea_iacc:
3015 	movc		%pcr,%d0
3016 	btst		&0x1,%d0
3017 	bne.b		iea_iacc_cont
3018 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3020 iea_iacc_cont:
3021 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3022 
3023 	unlk		%a6
3024 
3025 	subq.w		&0x8,%sp		# make stack frame bigger
3026 	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3027 	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3028 	mov.w		&0x4008,0x6(%sp)	# store voff
3029 	mov.l		0x2(%sp),0x8(%sp)	# store ea
3030 	mov.l		&0x09428001,0xc(%sp)	# store fslw
3031 
3032 iea_acc_done:
3033 	btst		&0x5,(%sp)		# user or supervisor mode?
3034 	beq.b		iea_acc_done2		# user
3035 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3036 
3037 iea_acc_done2:
3038 	bra.l		_real_access
3039 
3040 iea_dacc:
3041 	lea		-LOCAL_SIZE(%a6),%sp
3042 
3043 	movc		%pcr,%d1
3044 	btst		&0x1,%d1
3045 	bne.b		iea_dacc_cont
3046 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3047 	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048 iea_dacc_cont:
3049 	mov.l		(%a6),%a6
3050 
3051 	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052 	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053 	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054 	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055 	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056 	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057 
3058 	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059 	add.w		&LOCAL_SIZE-0x4,%sp
3060 
3061 	bra.b		iea_acc_done
3062 
3063 #########################################################################
3064 # XDEF ****************************************************************	#
3065 #	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3066 #									#
3067 #	This handler should be the first code executed upon taking the	#
3068 #	FP Operand Error exception in an operating system.		#
3069 #									#
3070 # XREF ****************************************************************	#
3071 #	_imem_read_long() - read instruction longword			#
3072 #	fix_skewed_ops() - adjust src operand in fsave frame		#
3073 #	_real_operr() - "callout" to operating system operr handler	#
3074 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3075 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3076 #	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3077 #									#
3078 # INPUT ***************************************************************	#
3079 #	- The system stack contains the FP Operr exception frame	#
3080 #	- The fsave frame contains the source operand			#
3081 #									#
3082 # OUTPUT **************************************************************	#
3083 #	No access error:						#
3084 #	- The system stack is unchanged					#
3085 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
3086 #									#
3087 # ALGORITHM ***********************************************************	#
3088 #	In a system where the FP Operr exception is enabled, the goal	#
3089 # is to get to the handler specified at _real_operr(). But, on the 060,	#
3090 # for opclass zero and two instruction taking this exception, the	#
3091 # input operand in the fsave frame may be incorrect for some cases	#
3092 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
3093 # do just this and then exits through _real_operr().			#
3094 #	For opclass 3 instructions, the 060 doesn't store the default	#
3095 # operr result out to memory or data register file as it should.	#
3096 # This code must emulate the move out before finally exiting through	#
3097 # _real_inex(). The move out, if to memory, is performed using		#
3098 # _mem_write() "callout" routines that may return a failing result.	#
3099 # In this special case, the handler must exit through facc_out()	#
3100 # which creates an access error stack frame from the current operr	#
3101 # stack frame.								#
3102 #									#
3103 #########################################################################
3104 
3105 	global		_fpsp_operr
3106 _fpsp_operr:
3107 
3108 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3109 
3110 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3111 
3112 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3113 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3115 
3116 # the FPIAR holds the "current PC" of the faulting instruction
3117 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118 
3119 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3120 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3121 	bsr.l		_imem_read_long		# fetch the instruction words
3122 	mov.l		%d0,EXC_OPWORD(%a6)
3123 
3124 ##############################################################################
3125 
3126 	btst		&13,%d0			# is instr an fmove out?
3127 	bne.b		foperr_out		# fmove out
3128 
3129 
3130 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131 # this would be the case for opclass two operations with a source infinity or
3132 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133 # cause an operr so we don't need to check for them here.
3134 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3135 	bsr.l		fix_skewed_ops		# fix src op
3136 
3137 foperr_exit:
3138 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3139 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3141 
3142 	frestore	FP_SRC(%a6)
3143 
3144 	unlk		%a6
3145 	bra.l		_real_operr
3146 
3147 ########################################################################
3148 
3149 #
3150 # the hardware does not save the default result to memory on enabled
3151 # operand error exceptions. we do this here before passing control to
3152 # the user operand error handler.
3153 #
3154 # byte, word, and long destination format operations can pass
3155 # through here. we simply need to test the sign of the src
3156 # operand and save the appropriate minimum or maximum integer value
3157 # to the effective address as pointed to by the stacked effective address.
3158 #
3159 # although packed opclass three operations can take operand error
3160 # exceptions, they won't pass through here since they are caught
3161 # first by the unsupported data format exception handler. that handler
3162 # sends them directly to _real_operr() if necessary.
3163 #
3164 foperr_out:
3165 
3166 	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3167 	andi.w		&0x7fff,%d1
3168 	cmpi.w		%d1,&0x7fff
3169 	bne.b		foperr_out_not_qnan
3170 # the operand is either an infinity or a QNAN.
3171 	tst.l		FP_SRC_LO(%a6)
3172 	bne.b		foperr_out_qnan
3173 	mov.l		FP_SRC_HI(%a6),%d1
3174 	andi.l		&0x7fffffff,%d1
3175 	beq.b		foperr_out_not_qnan
3176 foperr_out_qnan:
3177 	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3178 	bra.b		foperr_out_jmp
3179 
3180 foperr_out_not_qnan:
3181 	mov.l		&0x7fffffff,%d1
3182 	tst.b		FP_SRC_EX(%a6)
3183 	bpl.b		foperr_out_not_qnan2
3184 	addq.l		&0x1,%d1
3185 foperr_out_not_qnan2:
3186 	mov.l		%d1,L_SCR1(%a6)
3187 
3188 foperr_out_jmp:
3189 	bfextu		%d0{&19:&3},%d0		# extract dst format field
3190 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3191 	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3192 	jmp		(tbl_operr.b,%pc,%a0)
3193 
3194 tbl_operr:
3195 	short		foperr_out_l - tbl_operr # long word integer
3196 	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3197 	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3198 	short		foperr_exit  - tbl_operr # packed won't enter here
3199 	short		foperr_out_w - tbl_operr # word integer
3200 	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3201 	short		foperr_out_b - tbl_operr # byte integer
3202 	short		tbl_operr    - tbl_operr # packed won't enter here
3203 
3204 foperr_out_b:
3205 	mov.b		L_SCR1(%a6),%d0		# load positive default result
3206 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3207 	ble.b		foperr_out_b_save_dn	# yes
3208 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3209 	bsr.l		_dmem_write_byte	# write the default result
3210 
3211 	tst.l		%d1			# did dstore fail?
3212 	bne.l		facc_out_b		# yes
3213 
3214 	bra.w		foperr_exit
3215 foperr_out_b_save_dn:
3216 	andi.w		&0x0007,%d1
3217 	bsr.l		store_dreg_b		# store result to regfile
3218 	bra.w		foperr_exit
3219 
3220 foperr_out_w:
3221 	mov.w		L_SCR1(%a6),%d0		# load positive default result
3222 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3223 	ble.b		foperr_out_w_save_dn	# yes
3224 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3225 	bsr.l		_dmem_write_word	# write the default result
3226 
3227 	tst.l		%d1			# did dstore fail?
3228 	bne.l		facc_out_w		# yes
3229 
3230 	bra.w		foperr_exit
3231 foperr_out_w_save_dn:
3232 	andi.w		&0x0007,%d1
3233 	bsr.l		store_dreg_w		# store result to regfile
3234 	bra.w		foperr_exit
3235 
3236 foperr_out_l:
3237 	mov.l		L_SCR1(%a6),%d0		# load positive default result
3238 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3239 	ble.b		foperr_out_l_save_dn	# yes
3240 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3241 	bsr.l		_dmem_write_long	# write the default result
3242 
3243 	tst.l		%d1			# did dstore fail?
3244 	bne.l		facc_out_l		# yes
3245 
3246 	bra.w		foperr_exit
3247 foperr_out_l_save_dn:
3248 	andi.w		&0x0007,%d1
3249 	bsr.l		store_dreg_l		# store result to regfile
3250 	bra.w		foperr_exit
3251 
3252 #########################################################################
3253 # XDEF ****************************************************************	#
3254 #	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3255 #									#
3256 #	This handler should be the first code executed upon taking the	#
3257 #	FP Signalling NAN exception in an operating system.		#
3258 #									#
3259 # XREF ****************************************************************	#
3260 #	_imem_read_long() - read instruction longword			#
3261 #	fix_skewed_ops() - adjust src operand in fsave frame		#
3262 #	_real_snan() - "callout" to operating system SNAN handler	#
3263 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3264 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3265 #	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3266 #	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3267 #									#
3268 # INPUT ***************************************************************	#
3269 #	- The system stack contains the FP SNAN exception frame		#
3270 #	- The fsave frame contains the source operand			#
3271 #									#
3272 # OUTPUT **************************************************************	#
3273 #	No access error:						#
3274 #	- The system stack is unchanged					#
3275 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
3276 #									#
3277 # ALGORITHM ***********************************************************	#
3278 #	In a system where the FP SNAN exception is enabled, the goal	#
3279 # is to get to the handler specified at _real_snan(). But, on the 060,	#
3280 # for opclass zero and two instructions taking this exception, the	#
3281 # input operand in the fsave frame may be incorrect for some cases	#
3282 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
3283 # do just this and then exits through _real_snan().			#
3284 #	For opclass 3 instructions, the 060 doesn't store the default	#
3285 # SNAN result out to memory or data register file as it should.		#
3286 # This code must emulate the move out before finally exiting through	#
3287 # _real_snan(). The move out, if to memory, is performed using		#
3288 # _mem_write() "callout" routines that may return a failing result.	#
3289 # In this special case, the handler must exit through facc_out()	#
3290 # which creates an access error stack frame from the current SNAN	#
3291 # stack frame.								#
3292 #	For the case of an extended precision opclass 3 instruction,	#
3293 # if the effective addressing mode was -() or ()+, then the address	#
3294 # register must get updated by calling _calc_ea_fout(). If the <ea>	#
3295 # was -(a7) from supervisor mode, then the exception frame currently	#
3296 # on the system stack must be carefully moved "down" to make room	#
3297 # for the operand being moved.						#
3298 #									#
3299 #########################################################################
3300 
3301 	global		_fpsp_snan
3302 _fpsp_snan:
3303 
3304 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3305 
3306 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3307 
3308 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3309 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3311 
3312 # the FPIAR holds the "current PC" of the faulting instruction
3313 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314 
3315 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3316 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3317 	bsr.l		_imem_read_long		# fetch the instruction words
3318 	mov.l		%d0,EXC_OPWORD(%a6)
3319 
3320 ##############################################################################
3321 
3322 	btst		&13,%d0			# is instr an fmove out?
3323 	bne.w		fsnan_out		# fmove out
3324 
3325 
3326 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327 # this would be the case for opclass two operations with a source infinity or
3328 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329 # fixed here.
3330 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3331 	bsr.l		fix_skewed_ops		# fix src op
3332 
3333 fsnan_exit:
3334 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3335 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3337 
3338 	frestore	FP_SRC(%a6)
3339 
3340 	unlk		%a6
3341 	bra.l		_real_snan
3342 
3343 ########################################################################
3344 
3345 #
3346 # the hardware does not save the default result to memory on enabled
3347 # snan exceptions. we do this here before passing control to
3348 # the user snan handler.
3349 #
3350 # byte, word, long, and packed destination format operations can pass
3351 # through here. since packed format operations already were handled by
3352 # fpsp_unsupp(), then we need to do nothing else for them here.
3353 # for byte, word, and long, we simply need to test the sign of the src
3354 # operand and save the appropriate minimum or maximum integer value
3355 # to the effective address as pointed to by the stacked effective address.
3356 #
3357 fsnan_out:
3358 
3359 	bfextu		%d0{&19:&3},%d0		# extract dst format field
3360 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3361 	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3362 	jmp		(tbl_snan.b,%pc,%a0)
3363 
3364 tbl_snan:
3365 	short		fsnan_out_l - tbl_snan # long word integer
3366 	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367 	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368 	short		tbl_snan    - tbl_snan # packed needs no help
3369 	short		fsnan_out_w - tbl_snan # word integer
3370 	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371 	short		fsnan_out_b - tbl_snan # byte integer
3372 	short		tbl_snan    - tbl_snan # packed needs no help
3373 
3374 fsnan_out_b:
3375 	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3376 	bset		&6,%d0			# set SNAN bit
3377 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3378 	ble.b		fsnan_out_b_dn		# yes
3379 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3380 	bsr.l		_dmem_write_byte	# write the default result
3381 
3382 	tst.l		%d1			# did dstore fail?
3383 	bne.l		facc_out_b		# yes
3384 
3385 	bra.w		fsnan_exit
3386 fsnan_out_b_dn:
3387 	andi.w		&0x0007,%d1
3388 	bsr.l		store_dreg_b		# store result to regfile
3389 	bra.w		fsnan_exit
3390 
3391 fsnan_out_w:
3392 	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3393 	bset		&14,%d0			# set SNAN bit
3394 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3395 	ble.b		fsnan_out_w_dn		# yes
3396 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3397 	bsr.l		_dmem_write_word	# write the default result
3398 
3399 	tst.l		%d1			# did dstore fail?
3400 	bne.l		facc_out_w		# yes
3401 
3402 	bra.w		fsnan_exit
3403 fsnan_out_w_dn:
3404 	andi.w		&0x0007,%d1
3405 	bsr.l		store_dreg_w		# store result to regfile
3406 	bra.w		fsnan_exit
3407 
3408 fsnan_out_l:
3409 	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3410 	bset		&30,%d0			# set SNAN bit
3411 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3412 	ble.b		fsnan_out_l_dn		# yes
3413 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3414 	bsr.l		_dmem_write_long	# write the default result
3415 
3416 	tst.l		%d1			# did dstore fail?
3417 	bne.l		facc_out_l		# yes
3418 
3419 	bra.w		fsnan_exit
3420 fsnan_out_l_dn:
3421 	andi.w		&0x0007,%d1
3422 	bsr.l		store_dreg_l		# store result to regfile
3423 	bra.w		fsnan_exit
3424 
3425 fsnan_out_s:
3426 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3427 	ble.b		fsnan_out_d_dn		# yes
3428 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3429 	andi.l		&0x80000000,%d0		# keep sign
3430 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3431 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3432 	lsr.l		&0x8,%d1		# shift mantissa for sgl
3433 	or.l		%d1,%d0			# create sgl SNAN
3434 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3435 	bsr.l		_dmem_write_long	# write the default result
3436 
3437 	tst.l		%d1			# did dstore fail?
3438 	bne.l		facc_out_l		# yes
3439 
3440 	bra.w		fsnan_exit
3441 fsnan_out_d_dn:
3442 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3443 	andi.l		&0x80000000,%d0		# keep sign
3444 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3445 	mov.l		%d1,-(%sp)
3446 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3447 	lsr.l		&0x8,%d1		# shift mantissa for sgl
3448 	or.l		%d1,%d0			# create sgl SNAN
3449 	mov.l		(%sp)+,%d1
3450 	andi.w		&0x0007,%d1
3451 	bsr.l		store_dreg_l		# store result to regfile
3452 	bra.w		fsnan_exit
3453 
3454 fsnan_out_d:
3455 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3456 	andi.l		&0x80000000,%d0		# keep sign
3457 	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3458 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3459 	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3460 	mov.l		&11,%d0			# load shift amt
3461 	lsr.l		%d0,%d1
3462 	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3463 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3464 	andi.l		&0x000007ff,%d1
3465 	ror.l		%d0,%d1
3466 	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3467 	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3468 	lsr.l		%d0,%d1
3469 	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3470 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3471 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3472 	movq.l		&0x8,%d0		# pass: size of 8 bytes
3473 	bsr.l		_dmem_write		# write the default result
3474 
3475 	tst.l		%d1			# did dstore fail?
3476 	bne.l		facc_out_d		# yes
3477 
3478 	bra.w		fsnan_exit
3479 
3480 # for extended precision, if the addressing mode is pre-decrement or
3481 # post-increment, then the address register did not get updated.
3482 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3483 fsnan_out_x:
3484 	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3485 
3486 	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487 	clr.w		2+FP_SCR0(%a6)
3488 	mov.l		FP_SRC_HI(%a6),%d0
3489 	bset		&30,%d0
3490 	mov.l		%d0,FP_SCR0_HI(%a6)
3491 	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492 
3493 	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3494 	bne.b		fsnan_out_x_s		# yes
3495 
3496 	mov.l		%usp,%a0		# fetch user stack pointer
3497 	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3498 	mov.l		(%a6),EXC_A6(%a6)
3499 
3500 	bsr.l		_calc_ea_fout		# find the correct ea,update An
3501 	mov.l		%a0,%a1
3502 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3503 
3504 	mov.l		EXC_A7(%a6),%a0
3505 	mov.l		%a0,%usp		# restore user stack pointer
3506 	mov.l		EXC_A6(%a6),(%a6)
3507 
3508 fsnan_out_x_save:
3509 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3510 	movq.l		&0xc,%d0		# pass: size of extended
3511 	bsr.l		_dmem_write		# write the default result
3512 
3513 	tst.l		%d1			# did dstore fail?
3514 	bne.l		facc_out_x		# yes
3515 
3516 	bra.w		fsnan_exit
3517 
3518 fsnan_out_x_s:
3519 	mov.l		(%a6),EXC_A6(%a6)
3520 
3521 	bsr.l		_calc_ea_fout		# find the correct ea,update An
3522 	mov.l		%a0,%a1
3523 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3524 
3525 	mov.l		EXC_A6(%a6),(%a6)
3526 
3527 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528 	bne.b		fsnan_out_x_save	# no
3529 
3530 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3532 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3534 
3535 	frestore	FP_SRC(%a6)
3536 
3537 	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3538 
3539 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540 	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542 
3543 	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544 	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545 	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546 
3547 	add.l		&LOCAL_SIZE-0x8,%sp
3548 
3549 	bra.l		_real_snan
3550 
3551 #########################################################################
3552 # XDEF ****************************************************************	#
3553 #	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3554 #									#
3555 #	This handler should be the first code executed upon taking the	#
3556 #	FP Inexact exception in an operating system.			#
3557 #									#
3558 # XREF ****************************************************************	#
3559 #	_imem_read_long() - read instruction longword			#
3560 #	fix_skewed_ops() - adjust src operand in fsave frame		#
3561 #	set_tag_x() - determine optype of src/dst operands		#
3562 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3563 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3564 #	load_fpn2() - load dst operand from FP regfile			#
3565 #	smovcr() - emulate an "fmovcr" instruction			#
3566 #	fout() - emulate an opclass 3 instruction			#
3567 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3568 #	_real_inex() - "callout" to operating system inexact handler	#
3569 #									#
3570 # INPUT ***************************************************************	#
3571 #	- The system stack contains the FP Inexact exception frame	#
3572 #	- The fsave frame contains the source operand			#
3573 #									#
3574 # OUTPUT **************************************************************	#
3575 #	- The system stack is unchanged					#
3576 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
3577 #									#
3578 # ALGORITHM ***********************************************************	#
3579 #	In a system where the FP Inexact exception is enabled, the goal	#
3580 # is to get to the handler specified at _real_inex(). But, on the 060,	#
3581 # for opclass zero and two instruction taking this exception, the	#
3582 # hardware doesn't store the correct result to the destination FP	#
3583 # register as did the '040 and '881/2. This handler must emulate the	#
3584 # instruction in order to get this value and then store it to the	#
3585 # correct register before calling _real_inex().				#
3586 #	For opclass 3 instructions, the 060 doesn't store the default	#
3587 # inexact result out to memory or data register file as it should.	#
3588 # This code must emulate the move out by calling fout() before finally	#
3589 # exiting through _real_inex().						#
3590 #									#
3591 #########################################################################
3592 
3593 	global		_fpsp_inex
3594 _fpsp_inex:
3595 
3596 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3597 
3598 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3599 
3600 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3601 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3603 
3604 # the FPIAR holds the "current PC" of the faulting instruction
3605 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606 
3607 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3608 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3609 	bsr.l		_imem_read_long		# fetch the instruction words
3610 	mov.l		%d0,EXC_OPWORD(%a6)
3611 
3612 ##############################################################################
3613 
3614 	btst		&13,%d0			# is instr an fmove out?
3615 	bne.w		finex_out		# fmove out
3616 
3617 
3618 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619 # longword integer directly into the upper longword of the mantissa along
3620 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3621 	bfextu		%d0{&19:&3},%d0		# fetch instr size
3622 	bne.b		finex_cont		# instr size is not long
3623 	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3624 	bne.b		finex_cont		# no
3625 	fmov.l		&0x0,%fpcr
3626 	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3627 	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3628 	mov.w		&0xe001,0x2+FP_SRC(%a6)
3629 
3630 finex_cont:
3631 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3632 	bsr.l		fix_skewed_ops		# fix src op
3633 
3634 # Here, we zero the ccode and exception byte field since we're going to
3635 # emulate the whole instruction. Notice, though, that we don't kill the
3636 # INEX1 bit. This is because a packed op has long since been converted
3637 # to extended before arriving here. Therefore, we need to retain the
3638 # INEX1 bit from when the operand was first converted.
3639 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640 
3641 	fmov.l		&0x0,%fpcr		# zero current control regs
3642 	fmov.l		&0x0,%fpsr
3643 
3644 	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645 	cmpi.b		%d1,&0x17		# is op an fmovecr?
3646 	beq.w		finex_fmovcr		# yes
3647 
3648 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3649 	bsr.l		set_tag_x		# tag the operand type
3650 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3651 
3652 # bits four and five of the fp extension word separate the monadic and dyadic
3653 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654 # will never take this exception, but fsincos will.
3655 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3656 	beq.b		finex_extract		# monadic
3657 
3658 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3659 	bne.b		finex_extract		# yes
3660 
3661 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662 	bsr.l		load_fpn2		# load dst into FP_DST
3663 
3664 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3665 	bsr.l		set_tag_x		# tag the operand type
3666 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3667 	bne.b		finex_op2_done		# no
3668 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3669 finex_op2_done:
3670 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3671 
3672 finex_extract:
3673 	clr.l		%d0
3674 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3675 
3676 	mov.b		1+EXC_CMDREG(%a6),%d1
3677 	andi.w		&0x007f,%d1		# extract extension
3678 
3679 	lea		FP_SRC(%a6),%a0
3680 	lea		FP_DST(%a6),%a1
3681 
3682 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3684 
3685 # the operation has been emulated. the result is in fp0.
3686 finex_save:
3687 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3688 	bsr.l		store_fpreg
3689 
3690 finex_exit:
3691 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3692 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3694 
3695 	frestore	FP_SRC(%a6)
3696 
3697 	unlk		%a6
3698 	bra.l		_real_inex
3699 
3700 finex_fmovcr:
3701 	clr.l		%d0
3702 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3703 	mov.b		1+EXC_CMDREG(%a6),%d1
3704 	andi.l		&0x0000007f,%d1		# pass rom offset
3705 	bsr.l		smovcr
3706 	bra.b		finex_save
3707 
3708 ########################################################################
3709 
3710 #
3711 # the hardware does not save the default result to memory on enabled
3712 # inexact exceptions. we do this here before passing control to
3713 # the user inexact handler.
3714 #
3715 # byte, word, and long destination format operations can pass
3716 # through here. so can double and single precision.
3717 # although packed opclass three operations can take inexact
3718 # exceptions, they won't pass through here since they are caught
3719 # first by the unsupported data format exception handler. that handler
3720 # sends them directly to _real_inex() if necessary.
3721 #
3722 finex_out:
3723 
3724 	mov.b		&NORM,STAG(%a6)		# src is a NORM
3725 
3726 	clr.l		%d0
3727 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3728 
3729 	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3730 
3731 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3732 
3733 	bsr.l		fout			# store the default result
3734 
3735 	bra.b		finex_exit
3736 
3737 #########################################################################
3738 # XDEF ****************************************************************	#
3739 #	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3740 #									#
3741 #	This handler should be the first code executed upon taking	#
3742 #	the FP DZ exception in an operating system.			#
3743 #									#
3744 # XREF ****************************************************************	#
3745 #	_imem_read_long() - read instruction longword from memory	#
3746 #	fix_skewed_ops() - adjust fsave operand				#
3747 #	_real_dz() - "callout" exit point from FP DZ handler		#
3748 #									#
3749 # INPUT ***************************************************************	#
3750 #	- The system stack contains the FP DZ exception stack.		#
3751 #	- The fsave frame contains the source operand.			#
3752 #									#
3753 # OUTPUT **************************************************************	#
3754 #	- The system stack contains the FP DZ exception stack.		#
3755 #	- The fsave frame contains the adjusted source operand.		#
3756 #									#
3757 # ALGORITHM ***********************************************************	#
3758 #	In a system where the DZ exception is enabled, the goal is to	#
3759 # get to the handler specified at _real_dz(). But, on the 060, when the	#
3760 # exception is taken, the input operand in the fsave state frame may	#
3761 # be incorrect for some cases and need to be adjusted. So, this package	#
3762 # adjusts the operand using fix_skewed_ops() and then branches to	#
3763 # _real_dz().								#
3764 #									#
3765 #########################################################################
3766 
3767 	global		_fpsp_dz
3768 _fpsp_dz:
3769 
3770 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3771 
3772 	fsave		FP_SRC(%a6)		# grab the "busy" frame
3773 
3774 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3775 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3777 
3778 # the FPIAR holds the "current PC" of the faulting instruction
3779 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780 
3781 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3782 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3783 	bsr.l		_imem_read_long		# fetch the instruction words
3784 	mov.l		%d0,EXC_OPWORD(%a6)
3785 
3786 ##############################################################################
3787 
3788 
3789 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790 # this would be the case for opclass two operations with a source zero
3791 # in the sgl or dbl format.
3792 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3793 	bsr.l		fix_skewed_ops		# fix src op
3794 
3795 fdz_exit:
3796 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3797 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3799 
3800 	frestore	FP_SRC(%a6)
3801 
3802 	unlk		%a6
3803 	bra.l		_real_dz
3804 
3805 #########################################################################
3806 # XDEF ****************************************************************	#
3807 #	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
3808 #		       exception when the "reduced" version of the	#
3809 #		       FPSP is implemented that does not emulate	#
3810 #		       FP unimplemented instructions.			#
3811 #									#
3812 #	This handler should be the first code executed upon taking a	#
3813 #	"Line F Emulator" exception in an operating system integrating	#
3814 #	the reduced version of 060FPSP.					#
3815 #									#
3816 # XREF ****************************************************************	#
3817 #	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
3818 #	_real_fline() - Handle all other cases (treated equally)	#
3819 #									#
3820 # INPUT ***************************************************************	#
3821 #	- The system stack contains a "Line F Emulator" exception	#
3822 #	  stack frame.							#
3823 #									#
3824 # OUTPUT **************************************************************	#
3825 #	- The system stack is unchanged.				#
3826 #									#
3827 # ALGORITHM ***********************************************************	#
3828 #	When a "Line F Emulator" exception occurs in a system where	#
3829 # "FPU Unimplemented" instructions will not be emulated, the exception	#
3830 # can occur because then FPU is disabled or the instruction is to be	#
3831 # classifed as "Line F". This module determines which case exists and	#
3832 # calls the appropriate "callout".					#
3833 #									#
3834 #########################################################################
3835 
3836 	global		_fpsp_fline
3837 _fpsp_fline:
3838 
3839 # check to see if the FPU is disabled. if so, jump to the OS entry
3840 # point for that condition.
3841 	cmpi.w		0x6(%sp),&0x402c
3842 	beq.l		_real_fpu_disabled
3843 
3844 	bra.l		_real_fline
3845 
3846 #########################################################################
3847 # XDEF ****************************************************************	#
3848 #	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
3849 #									#
3850 # XREF ****************************************************************	#
3851 #	inc_areg() - increment an address register			#
3852 #	dec_areg() - decrement an address register			#
3853 #									#
3854 # INPUT ***************************************************************	#
3855 #	d0 = number of bytes to adjust <ea> by				#
3856 #									#
3857 # OUTPUT **************************************************************	#
3858 #	None								#
3859 #									#
3860 # ALGORITHM ***********************************************************	#
3861 # "Dummy" CALCulate Effective Address:					#
3862 #	The stacked <ea> for FP unimplemented instructions and opclass	#
3863 #	two packed instructions is correct with the exception of...	#
3864 #									#
3865 #	1) -(An)   : The register is not updated regardless of size.	#
3866 #		     Also, for extended precision and packed, the	#
3867 #		     stacked <ea> value is 8 bytes too big		#
3868 #	2) (An)+   : The register is not updated.			#
3869 #	3) #<data> : The upper longword of the immediate operand is	#
3870 #		     stacked b,w,l and s sizes are completely stacked.	#
3871 #		     d,x, and p are not.				#
3872 #									#
3873 #########################################################################
3874 
3875 	global		_dcalc_ea
3876 _dcalc_ea:
3877 	mov.l		%d0, %a0		# move # bytes to %a0
3878 
3879 	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
3880 	mov.l		%d0, %d1		# make a copy
3881 
3882 	andi.w		&0x38, %d0		# extract mode field
3883 	andi.l		&0x7, %d1		# extract reg  field
3884 
3885 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3886 	beq.b		dcea_pi			# yes
3887 
3888 	cmpi.b		%d0,&0x20		# is mode -(An) ?
3889 	beq.b		dcea_pd			# yes
3890 
3891 	or.w		%d1,%d0			# concat mode,reg
3892 	cmpi.b		%d0,&0x3c		# is mode #<data>?
3893 
3894 	beq.b		dcea_imm		# yes
3895 
3896 	mov.l		EXC_EA(%a6),%a0		# return <ea>
3897 	rts
3898 
3899 # need to set immediate data flag here since we'll need to do
3900 # an imem_read to fetch this later.
3901 dcea_imm:
3902 	mov.b		&immed_flg,SPCOND_FLG(%a6)
3903 	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904 	rts
3905 
3906 # here, the <ea> is stacked correctly. however, we must update the
3907 # address register...
3908 dcea_pi:
3909 	mov.l		%a0,%d0			# pass amt to inc by
3910 	bsr.l		inc_areg		# inc addr register
3911 
3912 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3913 	rts
3914 
3915 # the <ea> is stacked correctly for all but extended and packed which
3916 # the <ea>s are 8 bytes too large.
3917 # it would make no sense to have a pre-decrement to a7 in supervisor
3918 # mode so we don't even worry about this tricky case here : )
3919 dcea_pd:
3920 	mov.l		%a0,%d0			# pass amt to dec by
3921 	bsr.l		dec_areg		# dec addr register
3922 
3923 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3924 
3925 	cmpi.b		%d0,&0xc		# is opsize ext or packed?
3926 	beq.b		dcea_pd2		# yes
3927 	rts
3928 dcea_pd2:
3929 	sub.l		&0x8,%a0		# correct <ea>
3930 	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
3931 	rts
3932 
3933 #########################################################################
3934 # XDEF ****************************************************************	#
3935 #	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
3936 #			 and packed data opclass 3 operations.		#
3937 #									#
3938 # XREF ****************************************************************	#
3939 #	None								#
3940 #									#
3941 # INPUT ***************************************************************	#
3942 #	None								#
3943 #									#
3944 # OUTPUT **************************************************************	#
3945 #	a0 = return correct effective address				#
3946 #									#
3947 # ALGORITHM ***********************************************************	#
3948 #	For opclass 3 extended and packed data operations, the <ea>	#
3949 # stacked for the exception is incorrect for -(an) and (an)+ addressing	#
3950 # modes. Also, while we're at it, the index register itself must get	#
3951 # updated.								#
3952 #	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
3953 # and return that value as the correct <ea> and store that value in An.	#
3954 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
3955 #									#
3956 #########################################################################
3957 
3958 # This calc_ea is currently used to retrieve the correct <ea>
3959 # for fmove outs of type extended and packed.
3960 	global		_calc_ea_fout
3961 _calc_ea_fout:
3962 	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
3963 	mov.l		%d0,%d1			# make a copy
3964 
3965 	andi.w		&0x38,%d0		# extract mode field
3966 	andi.l		&0x7,%d1		# extract reg  field
3967 
3968 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3969 	beq.b		ceaf_pi			# yes
3970 
3971 	cmpi.b		%d0,&0x20		# is mode -(An) ?
3972 	beq.w		ceaf_pd			# yes
3973 
3974 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3975 	rts
3976 
3977 # (An)+ : extended and packed fmove out
3978 #	: stacked <ea> is correct
3979 #	: "An" not updated
3980 ceaf_pi:
3981 	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982 	mov.l		EXC_EA(%a6),%a0
3983 	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
3984 
3985 	swbeg		&0x8
3986 tbl_ceaf_pi:
3987 	short		ceaf_pi0 - tbl_ceaf_pi
3988 	short		ceaf_pi1 - tbl_ceaf_pi
3989 	short		ceaf_pi2 - tbl_ceaf_pi
3990 	short		ceaf_pi3 - tbl_ceaf_pi
3991 	short		ceaf_pi4 - tbl_ceaf_pi
3992 	short		ceaf_pi5 - tbl_ceaf_pi
3993 	short		ceaf_pi6 - tbl_ceaf_pi
3994 	short		ceaf_pi7 - tbl_ceaf_pi
3995 
3996 ceaf_pi0:
3997 	addi.l		&0xc,EXC_DREGS+0x8(%a6)
3998 	rts
3999 ceaf_pi1:
4000 	addi.l		&0xc,EXC_DREGS+0xc(%a6)
4001 	rts
4002 ceaf_pi2:
4003 	add.l		&0xc,%a2
4004 	rts
4005 ceaf_pi3:
4006 	add.l		&0xc,%a3
4007 	rts
4008 ceaf_pi4:
4009 	add.l		&0xc,%a4
4010 	rts
4011 ceaf_pi5:
4012 	add.l		&0xc,%a5
4013 	rts
4014 ceaf_pi6:
4015 	addi.l		&0xc,EXC_A6(%a6)
4016 	rts
4017 ceaf_pi7:
4018 	mov.b		&mia7_flg,SPCOND_FLG(%a6)
4019 	addi.l		&0xc,EXC_A7(%a6)
4020 	rts
4021 
4022 # -(An) : extended and packed fmove out
4023 #	: stacked <ea> = actual <ea> + 8
4024 #	: "An" not updated
4025 ceaf_pd:
4026 	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027 	mov.l		EXC_EA(%a6),%a0
4028 	sub.l		&0x8,%a0
4029 	sub.l		&0x8,EXC_EA(%a6)
4030 	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
4031 
4032 	swbeg		&0x8
4033 tbl_ceaf_pd:
4034 	short		ceaf_pd0 - tbl_ceaf_pd
4035 	short		ceaf_pd1 - tbl_ceaf_pd
4036 	short		ceaf_pd2 - tbl_ceaf_pd
4037 	short		ceaf_pd3 - tbl_ceaf_pd
4038 	short		ceaf_pd4 - tbl_ceaf_pd
4039 	short		ceaf_pd5 - tbl_ceaf_pd
4040 	short		ceaf_pd6 - tbl_ceaf_pd
4041 	short		ceaf_pd7 - tbl_ceaf_pd
4042 
4043 ceaf_pd0:
4044 	mov.l		%a0,EXC_DREGS+0x8(%a6)
4045 	rts
4046 ceaf_pd1:
4047 	mov.l		%a0,EXC_DREGS+0xc(%a6)
4048 	rts
4049 ceaf_pd2:
4050 	mov.l		%a0,%a2
4051 	rts
4052 ceaf_pd3:
4053 	mov.l		%a0,%a3
4054 	rts
4055 ceaf_pd4:
4056 	mov.l		%a0,%a4
4057 	rts
4058 ceaf_pd5:
4059 	mov.l		%a0,%a5
4060 	rts
4061 ceaf_pd6:
4062 	mov.l		%a0,EXC_A6(%a6)
4063 	rts
4064 ceaf_pd7:
4065 	mov.l		%a0,EXC_A7(%a6)
4066 	mov.b		&mda7_flg,SPCOND_FLG(%a6)
4067 	rts
4068 
4069 #
4070 # This table holds the offsets of the emulation routines for each individual
4071 # math operation relative to the address of this table. Included are
4072 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073 # this table is for the version if the 060FPSP without transcendentals.
4074 # The location within the table is determined by the extension bits of the
4075 # operation longword.
4076 #
4077 
4078 	swbeg		&109
4079 tbl_unsupp:
4080 	long		fin		- tbl_unsupp	# 00: fmove
4081 	long		fint		- tbl_unsupp	# 01: fint
4082 	long		tbl_unsupp	- tbl_unsupp	# 02: fsinh
4083 	long		fintrz		- tbl_unsupp	# 03: fintrz
4084 	long		fsqrt		- tbl_unsupp	# 04: fsqrt
4085 	long		tbl_unsupp	- tbl_unsupp
4086 	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
4087 	long		tbl_unsupp	- tbl_unsupp
4088 	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
4089 	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
4090 	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
4091 	long		tbl_unsupp	- tbl_unsupp
4092 	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
4093 	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
4094 	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
4095 	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
4096 	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
4097 	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
4098 	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
4099 	long		tbl_unsupp	- tbl_unsupp
4100 	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
4101 	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
4102 	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
4103 	long		tbl_unsupp	- tbl_unsupp
4104 	long		fabs		- tbl_unsupp	# 18: fabs
4105 	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
4106 	long		fneg		- tbl_unsupp	# 1a: fneg
4107 	long		tbl_unsupp	- tbl_unsupp
4108 	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
4109 	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
4110 	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
4111 	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
4112 	long		fdiv		- tbl_unsupp	# 20: fdiv
4113 	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
4114 	long		fadd		- tbl_unsupp	# 22: fadd
4115 	long		fmul		- tbl_unsupp	# 23: fmul
4116 	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
4117 	long		tbl_unsupp	- tbl_unsupp	# 25: frem
4118 	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
4119 	long		fsglmul		- tbl_unsupp	# 27: fsglmul
4120 	long		fsub		- tbl_unsupp	# 28: fsub
4121 	long		tbl_unsupp	- tbl_unsupp
4122 	long		tbl_unsupp	- tbl_unsupp
4123 	long		tbl_unsupp	- tbl_unsupp
4124 	long		tbl_unsupp	- tbl_unsupp
4125 	long		tbl_unsupp	- tbl_unsupp
4126 	long		tbl_unsupp	- tbl_unsupp
4127 	long		tbl_unsupp	- tbl_unsupp
4128 	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
4129 	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
4130 	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
4131 	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
4132 	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
4133 	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
4134 	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
4135 	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
4136 	long		fcmp		- tbl_unsupp	# 38: fcmp
4137 	long		tbl_unsupp	- tbl_unsupp
4138 	long		ftst		- tbl_unsupp	# 3a: ftst
4139 	long		tbl_unsupp	- tbl_unsupp
4140 	long		tbl_unsupp	- tbl_unsupp
4141 	long		tbl_unsupp	- tbl_unsupp
4142 	long		tbl_unsupp	- tbl_unsupp
4143 	long		tbl_unsupp	- tbl_unsupp
4144 	long		fsin		- tbl_unsupp	# 40: fsmove
4145 	long		fssqrt		- tbl_unsupp	# 41: fssqrt
4146 	long		tbl_unsupp	- tbl_unsupp
4147 	long		tbl_unsupp	- tbl_unsupp
4148 	long		fdin		- tbl_unsupp	# 44: fdmove
4149 	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
4150 	long		tbl_unsupp	- tbl_unsupp
4151 	long		tbl_unsupp	- tbl_unsupp
4152 	long		tbl_unsupp	- tbl_unsupp
4153 	long		tbl_unsupp	- tbl_unsupp
4154 	long		tbl_unsupp	- tbl_unsupp
4155 	long		tbl_unsupp	- tbl_unsupp
4156 	long		tbl_unsupp	- tbl_unsupp
4157 	long		tbl_unsupp	- tbl_unsupp
4158 	long		tbl_unsupp	- tbl_unsupp
4159 	long		tbl_unsupp	- tbl_unsupp
4160 	long		tbl_unsupp	- tbl_unsupp
4161 	long		tbl_unsupp	- tbl_unsupp
4162 	long		tbl_unsupp	- tbl_unsupp
4163 	long		tbl_unsupp	- tbl_unsupp
4164 	long		tbl_unsupp	- tbl_unsupp
4165 	long		tbl_unsupp	- tbl_unsupp
4166 	long		tbl_unsupp	- tbl_unsupp
4167 	long		tbl_unsupp	- tbl_unsupp
4168 	long		fsabs		- tbl_unsupp	# 58: fsabs
4169 	long		tbl_unsupp	- tbl_unsupp
4170 	long		fsneg		- tbl_unsupp	# 5a: fsneg
4171 	long		tbl_unsupp	- tbl_unsupp
4172 	long		fdabs		- tbl_unsupp	# 5c: fdabs
4173 	long		tbl_unsupp	- tbl_unsupp
4174 	long		fdneg		- tbl_unsupp	# 5e: fdneg
4175 	long		tbl_unsupp	- tbl_unsupp
4176 	long		fsdiv		- tbl_unsupp	# 60: fsdiv
4177 	long		tbl_unsupp	- tbl_unsupp
4178 	long		fsadd		- tbl_unsupp	# 62: fsadd
4179 	long		fsmul		- tbl_unsupp	# 63: fsmul
4180 	long		fddiv		- tbl_unsupp	# 64: fddiv
4181 	long		tbl_unsupp	- tbl_unsupp
4182 	long		fdadd		- tbl_unsupp	# 66: fdadd
4183 	long		fdmul		- tbl_unsupp	# 67: fdmul
4184 	long		fssub		- tbl_unsupp	# 68: fssub
4185 	long		tbl_unsupp	- tbl_unsupp
4186 	long		tbl_unsupp	- tbl_unsupp
4187 	long		tbl_unsupp	- tbl_unsupp
4188 	long		fdsub		- tbl_unsupp	# 6c: fdsub
4189 
4190 #################################################
4191 # Add this here so non-fp modules can compile.
4192 # (smovcr is called from fpsp_inex.)
4193 	global		smovcr
4194 smovcr:
4195 	bra.b		smovcr
4196 
4197 #########################################################################
4198 # XDEF ****************************************************************	#
4199 #	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
4200 #									#
4201 # XREF ****************************************************************	#
4202 #	fetch_dreg() - fetch data register				#
4203 #	{i,d,}mem_read() - fetch data from memory			#
4204 #	_mem_write() - write data to memory				#
4205 #	iea_iacc() - instruction memory access error occurred		#
4206 #	iea_dacc() - data memory access error occurred			#
4207 #	restore() - restore An index regs if access error occurred	#
4208 #									#
4209 # INPUT ***************************************************************	#
4210 #	None								#
4211 #									#
4212 # OUTPUT **************************************************************	#
4213 #	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
4214 #		d0 = size of dump					#
4215 #		d1 = Dn							#
4216 #	Else if instruction access error,				#
4217 #		d0 = FSLW						#
4218 #	Else if data access error,					#
4219 #		d0 = FSLW						#
4220 #		a0 = address of fault					#
4221 #	Else								#
4222 #		none.							#
4223 #									#
4224 # ALGORITHM ***********************************************************	#
4225 #	The effective address must be calculated since this is entered	#
4226 # from an "Unimplemented Effective Address" exception handler. So, we	#
4227 # have our own fcalc_ea() routine here. If an access error is flagged	#
4228 # by a _{i,d,}mem_read() call, we must exit through the special		#
4229 # handler.								#
4230 #	The data register is determined and its value loaded to get the	#
4231 # string of FP registers affected. This value is used as an index into	#
4232 # a lookup table such that we can determine the number of bytes		#
4233 # involved.								#
4234 #	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
4235 # to read in all FP values. Again, _mem_read() may fail and require a	#
4236 # special exit.								#
4237 #	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
4238 # to write all FP values. _mem_write() may also fail.			#
4239 #	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
4240 # then we return the size of the dump and the string to the caller	#
4241 # so that the move can occur outside of this routine. This special	#
4242 # case is required so that moves to the system stack are handled	#
4243 # correctly.								#
4244 #									#
4245 # DYNAMIC:								#
4246 #	fmovm.x	dn, <ea>						#
4247 #	fmovm.x	<ea>, dn						#
4248 #									#
4249 #	      <WORD 1>		      <WORD2>				#
4250 #	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
4251 #									#
4252 #	& = (0): predecrement addressing mode				#
4253 #	    (1): postincrement or control addressing mode		#
4254 #	@ = (0): move listed regs from memory to the FPU		#
4255 #	    (1): move listed regs from the FPU to memory		#
4256 #	$$$    : index of data register holding reg select mask		#
4257 #									#
4258 # NOTES:								#
4259 #	If the data register holds a zero, then the			#
4260 #	instruction is a nop.						#
4261 #									#
4262 #########################################################################
4263 
4264 	global		fmovm_dynamic
4265 fmovm_dynamic:
4266 
4267 # extract the data register in which the bit string resides...
4268 	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
4269 	andi.w		&0x70,%d1		# extract reg bits
4270 	lsr.b		&0x4,%d1		# shift into lo bits
4271 
4272 # fetch the bit string into d0...
4273 	bsr.l		fetch_dreg		# fetch reg string
4274 
4275 	andi.l		&0x000000ff,%d0		# keep only lo byte
4276 
4277 	mov.l		%d0,-(%sp)		# save strg
4278 	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
4279 	mov.l		%d0,-(%sp)		# save size
4280 	bsr.l		fmovm_calc_ea		# calculate <ea>
4281 	mov.l		(%sp)+,%d0		# restore size
4282 	mov.l		(%sp)+,%d1		# restore strg
4283 
4284 # if the bit string is a zero, then the operation is a no-op
4285 # but, make sure that we've calculated ea and advanced the opword pointer
4286 	beq.w		fmovm_data_done
4287 
4288 # separate move ins from move outs...
4289 	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
4290 	beq.w		fmovm_data_in		# it's a move out
4291 
4292 #############
4293 # MOVE OUT: #
4294 #############
4295 fmovm_data_out:
4296 	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
4297 	bne.w		fmovm_out_ctrl		# control
4298 
4299 ############################
4300 fmovm_out_predec:
4301 # for predecrement mode, the bit string is the opposite of both control
4302 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303 # here, we convert it to be just like the others...
4304 	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305 
4306 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
4307 	beq.b		fmovm_out_ctrl		# user
4308 
4309 fmovm_out_predec_s:
4310 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311 	bne.b		fmovm_out_ctrl
4312 
4313 # the operation was unfortunately an: fmovm.x dn,-(sp)
4314 # called from supervisor mode.
4315 # we're also passing "size" and "strg" back to the calling routine
4316 	rts
4317 
4318 ############################
4319 fmovm_out_ctrl:
4320 	mov.l		%a0,%a1			# move <ea> to a1
4321 
4322 	sub.l		%d0,%sp			# subtract size of dump
4323 	lea		(%sp),%a0
4324 
4325 	tst.b		%d1			# should FP0 be moved?
4326 	bpl.b		fmovm_out_ctrl_fp1	# no
4327 
4328 	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
4329 	mov.l		0x4+EXC_FP0(%a6),(%a0)+
4330 	mov.l		0x8+EXC_FP0(%a6),(%a0)+
4331 
4332 fmovm_out_ctrl_fp1:
4333 	lsl.b		&0x1,%d1		# should FP1 be moved?
4334 	bpl.b		fmovm_out_ctrl_fp2	# no
4335 
4336 	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
4337 	mov.l		0x4+EXC_FP1(%a6),(%a0)+
4338 	mov.l		0x8+EXC_FP1(%a6),(%a0)+
4339 
4340 fmovm_out_ctrl_fp2:
4341 	lsl.b		&0x1,%d1		# should FP2 be moved?
4342 	bpl.b		fmovm_out_ctrl_fp3	# no
4343 
4344 	fmovm.x		&0x20,(%a0)		# yes
4345 	add.l		&0xc,%a0
4346 
4347 fmovm_out_ctrl_fp3:
4348 	lsl.b		&0x1,%d1		# should FP3 be moved?
4349 	bpl.b		fmovm_out_ctrl_fp4	# no
4350 
4351 	fmovm.x		&0x10,(%a0)		# yes
4352 	add.l		&0xc,%a0
4353 
4354 fmovm_out_ctrl_fp4:
4355 	lsl.b		&0x1,%d1		# should FP4 be moved?
4356 	bpl.b		fmovm_out_ctrl_fp5	# no
4357 
4358 	fmovm.x		&0x08,(%a0)		# yes
4359 	add.l		&0xc,%a0
4360 
4361 fmovm_out_ctrl_fp5:
4362 	lsl.b		&0x1,%d1		# should FP5 be moved?
4363 	bpl.b		fmovm_out_ctrl_fp6	# no
4364 
4365 	fmovm.x		&0x04,(%a0)		# yes
4366 	add.l		&0xc,%a0
4367 
4368 fmovm_out_ctrl_fp6:
4369 	lsl.b		&0x1,%d1		# should FP6 be moved?
4370 	bpl.b		fmovm_out_ctrl_fp7	# no
4371 
4372 	fmovm.x		&0x02,(%a0)		# yes
4373 	add.l		&0xc,%a0
4374 
4375 fmovm_out_ctrl_fp7:
4376 	lsl.b		&0x1,%d1		# should FP7 be moved?
4377 	bpl.b		fmovm_out_ctrl_done	# no
4378 
4379 	fmovm.x		&0x01,(%a0)		# yes
4380 	add.l		&0xc,%a0
4381 
4382 fmovm_out_ctrl_done:
4383 	mov.l		%a1,L_SCR1(%a6)
4384 
4385 	lea		(%sp),%a0		# pass: supervisor src
4386 	mov.l		%d0,-(%sp)		# save size
4387 	bsr.l		_dmem_write		# copy data to user mem
4388 
4389 	mov.l		(%sp)+,%d0
4390 	add.l		%d0,%sp			# clear fpreg data from stack
4391 
4392 	tst.l		%d1			# did dstore err?
4393 	bne.w		fmovm_out_err		# yes
4394 
4395 	rts
4396 
4397 ############
4398 # MOVE IN: #
4399 ############
4400 fmovm_data_in:
4401 	mov.l		%a0,L_SCR1(%a6)
4402 
4403 	sub.l		%d0,%sp			# make room for fpregs
4404 	lea		(%sp),%a1
4405 
4406 	mov.l		%d1,-(%sp)		# save bit string for later
4407 	mov.l		%d0,-(%sp)		# save # of bytes
4408 
4409 	bsr.l		_dmem_read		# copy data from user mem
4410 
4411 	mov.l		(%sp)+,%d0		# retrieve # of bytes
4412 
4413 	tst.l		%d1			# did dfetch fail?
4414 	bne.w		fmovm_in_err		# yes
4415 
4416 	mov.l		(%sp)+,%d1		# load bit string
4417 
4418 	lea		(%sp),%a0		# addr of stack
4419 
4420 	tst.b		%d1			# should FP0 be moved?
4421 	bpl.b		fmovm_data_in_fp1	# no
4422 
4423 	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
4424 	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
4425 	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
4426 
4427 fmovm_data_in_fp1:
4428 	lsl.b		&0x1,%d1		# should FP1 be moved?
4429 	bpl.b		fmovm_data_in_fp2	# no
4430 
4431 	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
4432 	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
4433 	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
4434 
4435 fmovm_data_in_fp2:
4436 	lsl.b		&0x1,%d1		# should FP2 be moved?
4437 	bpl.b		fmovm_data_in_fp3	# no
4438 
4439 	fmovm.x		(%a0)+,&0x20		# yes
4440 
4441 fmovm_data_in_fp3:
4442 	lsl.b		&0x1,%d1		# should FP3 be moved?
4443 	bpl.b		fmovm_data_in_fp4	# no
4444 
4445 	fmovm.x		(%a0)+,&0x10		# yes
4446 
4447 fmovm_data_in_fp4:
4448 	lsl.b		&0x1,%d1		# should FP4 be moved?
4449 	bpl.b		fmovm_data_in_fp5	# no
4450 
4451 	fmovm.x		(%a0)+,&0x08		# yes
4452 
4453 fmovm_data_in_fp5:
4454 	lsl.b		&0x1,%d1		# should FP5 be moved?
4455 	bpl.b		fmovm_data_in_fp6	# no
4456 
4457 	fmovm.x		(%a0)+,&0x04		# yes
4458 
4459 fmovm_data_in_fp6:
4460 	lsl.b		&0x1,%d1		# should FP6 be moved?
4461 	bpl.b		fmovm_data_in_fp7	# no
4462 
4463 	fmovm.x		(%a0)+,&0x02		# yes
4464 
4465 fmovm_data_in_fp7:
4466 	lsl.b		&0x1,%d1		# should FP7 be moved?
4467 	bpl.b		fmovm_data_in_done	# no
4468 
4469 	fmovm.x		(%a0)+,&0x01		# yes
4470 
4471 fmovm_data_in_done:
4472 	add.l		%d0,%sp			# remove fpregs from stack
4473 	rts
4474 
4475 #####################################
4476 
4477 fmovm_data_done:
4478 	rts
4479 
4480 ##############################################################################
4481 
4482 #
4483 # table indexed by the operation's bit string that gives the number
4484 # of bytes that will be moved.
4485 #
4486 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487 #
4488 tbl_fmovm_size:
4489 	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520 	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521 
4522 #
4523 # table to convert a pre-decrement bit string into a post-increment
4524 # or control bit string.
4525 # ex:	0x00	==>	0x00
4526 #	0x01	==>	0x80
4527 #	0x02	==>	0x40
4528 #		.
4529 #		.
4530 #	0xfd	==>	0xbf
4531 #	0xfe	==>	0x7f
4532 #	0xff	==>	0xff
4533 #
4534 tbl_fmovm_convert:
4535 	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536 	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537 	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538 	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539 	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540 	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541 	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542 	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543 	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544 	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545 	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546 	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547 	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548 	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549 	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550 	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551 	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552 	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553 	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554 	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555 	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556 	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557 	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558 	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559 	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560 	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561 	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562 	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563 	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564 	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565 	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566 	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567 
4568 	global		fmovm_calc_ea
4569 ###############################################
4570 # _fmovm_calc_ea: calculate effective address #
4571 ###############################################
4572 fmovm_calc_ea:
4573 	mov.l		%d0,%a0			# move # bytes to a0
4574 
4575 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576 # easily changed if they were inputs passed in registers.
4577 	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
4578 	mov.w		%d0,%d1			# make a copy
4579 
4580 	andi.w		&0x3f,%d0		# extract mode field
4581 	andi.l		&0x7,%d1		# extract reg  field
4582 
4583 # jump to the corresponding function for each {MODE,REG} pair.
4584 	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585 	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586 
4587 	swbeg		&64
4588 tbl_fea_mode:
4589 	short		tbl_fea_mode	-	tbl_fea_mode
4590 	short		tbl_fea_mode	-	tbl_fea_mode
4591 	short		tbl_fea_mode	-	tbl_fea_mode
4592 	short		tbl_fea_mode	-	tbl_fea_mode
4593 	short		tbl_fea_mode	-	tbl_fea_mode
4594 	short		tbl_fea_mode	-	tbl_fea_mode
4595 	short		tbl_fea_mode	-	tbl_fea_mode
4596 	short		tbl_fea_mode	-	tbl_fea_mode
4597 
4598 	short		tbl_fea_mode	-	tbl_fea_mode
4599 	short		tbl_fea_mode	-	tbl_fea_mode
4600 	short		tbl_fea_mode	-	tbl_fea_mode
4601 	short		tbl_fea_mode	-	tbl_fea_mode
4602 	short		tbl_fea_mode	-	tbl_fea_mode
4603 	short		tbl_fea_mode	-	tbl_fea_mode
4604 	short		tbl_fea_mode	-	tbl_fea_mode
4605 	short		tbl_fea_mode	-	tbl_fea_mode
4606 
4607 	short		faddr_ind_a0	-	tbl_fea_mode
4608 	short		faddr_ind_a1	-	tbl_fea_mode
4609 	short		faddr_ind_a2	-	tbl_fea_mode
4610 	short		faddr_ind_a3	-	tbl_fea_mode
4611 	short		faddr_ind_a4	-	tbl_fea_mode
4612 	short		faddr_ind_a5	-	tbl_fea_mode
4613 	short		faddr_ind_a6	-	tbl_fea_mode
4614 	short		faddr_ind_a7	-	tbl_fea_mode
4615 
4616 	short		faddr_ind_p_a0	-	tbl_fea_mode
4617 	short		faddr_ind_p_a1	-	tbl_fea_mode
4618 	short		faddr_ind_p_a2	-	tbl_fea_mode
4619 	short		faddr_ind_p_a3	-	tbl_fea_mode
4620 	short		faddr_ind_p_a4	-	tbl_fea_mode
4621 	short		faddr_ind_p_a5	-	tbl_fea_mode
4622 	short		faddr_ind_p_a6	-	tbl_fea_mode
4623 	short		faddr_ind_p_a7	-	tbl_fea_mode
4624 
4625 	short		faddr_ind_m_a0	-	tbl_fea_mode
4626 	short		faddr_ind_m_a1	-	tbl_fea_mode
4627 	short		faddr_ind_m_a2	-	tbl_fea_mode
4628 	short		faddr_ind_m_a3	-	tbl_fea_mode
4629 	short		faddr_ind_m_a4	-	tbl_fea_mode
4630 	short		faddr_ind_m_a5	-	tbl_fea_mode
4631 	short		faddr_ind_m_a6	-	tbl_fea_mode
4632 	short		faddr_ind_m_a7	-	tbl_fea_mode
4633 
4634 	short		faddr_ind_disp_a0	-	tbl_fea_mode
4635 	short		faddr_ind_disp_a1	-	tbl_fea_mode
4636 	short		faddr_ind_disp_a2	-	tbl_fea_mode
4637 	short		faddr_ind_disp_a3	-	tbl_fea_mode
4638 	short		faddr_ind_disp_a4	-	tbl_fea_mode
4639 	short		faddr_ind_disp_a5	-	tbl_fea_mode
4640 	short		faddr_ind_disp_a6	-	tbl_fea_mode
4641 	short		faddr_ind_disp_a7	-	tbl_fea_mode
4642 
4643 	short		faddr_ind_ext	-	tbl_fea_mode
4644 	short		faddr_ind_ext	-	tbl_fea_mode
4645 	short		faddr_ind_ext	-	tbl_fea_mode
4646 	short		faddr_ind_ext	-	tbl_fea_mode
4647 	short		faddr_ind_ext	-	tbl_fea_mode
4648 	short		faddr_ind_ext	-	tbl_fea_mode
4649 	short		faddr_ind_ext	-	tbl_fea_mode
4650 	short		faddr_ind_ext	-	tbl_fea_mode
4651 
4652 	short		fabs_short	-	tbl_fea_mode
4653 	short		fabs_long	-	tbl_fea_mode
4654 	short		fpc_ind		-	tbl_fea_mode
4655 	short		fpc_ind_ext	-	tbl_fea_mode
4656 	short		tbl_fea_mode	-	tbl_fea_mode
4657 	short		tbl_fea_mode	-	tbl_fea_mode
4658 	short		tbl_fea_mode	-	tbl_fea_mode
4659 	short		tbl_fea_mode	-	tbl_fea_mode
4660 
4661 ###################################
4662 # Address register indirect: (An) #
4663 ###################################
4664 faddr_ind_a0:
4665 	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
4666 	rts
4667 
4668 faddr_ind_a1:
4669 	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
4670 	rts
4671 
4672 faddr_ind_a2:
4673 	mov.l		%a2,%a0			# Get current a2
4674 	rts
4675 
4676 faddr_ind_a3:
4677 	mov.l		%a3,%a0			# Get current a3
4678 	rts
4679 
4680 faddr_ind_a4:
4681 	mov.l		%a4,%a0			# Get current a4
4682 	rts
4683 
4684 faddr_ind_a5:
4685 	mov.l		%a5,%a0			# Get current a5
4686 	rts
4687 
4688 faddr_ind_a6:
4689 	mov.l		(%a6),%a0		# Get current a6
4690 	rts
4691 
4692 faddr_ind_a7:
4693 	mov.l		EXC_A7(%a6),%a0		# Get current a7
4694 	rts
4695 
4696 #####################################################
4697 # Address register indirect w/ postincrement: (An)+ #
4698 #####################################################
4699 faddr_ind_p_a0:
4700 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4701 	mov.l		%d0,%d1
4702 	add.l		%a0,%d1			# Increment
4703 	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
4704 	mov.l		%d0,%a0
4705 	rts
4706 
4707 faddr_ind_p_a1:
4708 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4709 	mov.l		%d0,%d1
4710 	add.l		%a0,%d1			# Increment
4711 	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
4712 	mov.l		%d0,%a0
4713 	rts
4714 
4715 faddr_ind_p_a2:
4716 	mov.l		%a2,%d0			# Get current a2
4717 	mov.l		%d0,%d1
4718 	add.l		%a0,%d1			# Increment
4719 	mov.l		%d1,%a2			# Save incr value
4720 	mov.l		%d0,%a0
4721 	rts
4722 
4723 faddr_ind_p_a3:
4724 	mov.l		%a3,%d0			# Get current a3
4725 	mov.l		%d0,%d1
4726 	add.l		%a0,%d1			# Increment
4727 	mov.l		%d1,%a3			# Save incr value
4728 	mov.l		%d0,%a0
4729 	rts
4730 
4731 faddr_ind_p_a4:
4732 	mov.l		%a4,%d0			# Get current a4
4733 	mov.l		%d0,%d1
4734 	add.l		%a0,%d1			# Increment
4735 	mov.l		%d1,%a4			# Save incr value
4736 	mov.l		%d0,%a0
4737 	rts
4738 
4739 faddr_ind_p_a5:
4740 	mov.l		%a5,%d0			# Get current a5
4741 	mov.l		%d0,%d1
4742 	add.l		%a0,%d1			# Increment
4743 	mov.l		%d1,%a5			# Save incr value
4744 	mov.l		%d0,%a0
4745 	rts
4746 
4747 faddr_ind_p_a6:
4748 	mov.l		(%a6),%d0		# Get current a6
4749 	mov.l		%d0,%d1
4750 	add.l		%a0,%d1			# Increment
4751 	mov.l		%d1,(%a6)		# Save incr value
4752 	mov.l		%d0,%a0
4753 	rts
4754 
4755 faddr_ind_p_a7:
4756 	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757 
4758 	mov.l		EXC_A7(%a6),%d0		# Get current a7
4759 	mov.l		%d0,%d1
4760 	add.l		%a0,%d1			# Increment
4761 	mov.l		%d1,EXC_A7(%a6)		# Save incr value
4762 	mov.l		%d0,%a0
4763 	rts
4764 
4765 ####################################################
4766 # Address register indirect w/ predecrement: -(An) #
4767 ####################################################
4768 faddr_ind_m_a0:
4769 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4770 	sub.l		%a0,%d0			# Decrement
4771 	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
4772 	mov.l		%d0,%a0
4773 	rts
4774 
4775 faddr_ind_m_a1:
4776 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4777 	sub.l		%a0,%d0			# Decrement
4778 	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
4779 	mov.l		%d0,%a0
4780 	rts
4781 
4782 faddr_ind_m_a2:
4783 	mov.l		%a2,%d0			# Get current a2
4784 	sub.l		%a0,%d0			# Decrement
4785 	mov.l		%d0,%a2			# Save decr value
4786 	mov.l		%d0,%a0
4787 	rts
4788 
4789 faddr_ind_m_a3:
4790 	mov.l		%a3,%d0			# Get current a3
4791 	sub.l		%a0,%d0			# Decrement
4792 	mov.l		%d0,%a3			# Save decr value
4793 	mov.l		%d0,%a0
4794 	rts
4795 
4796 faddr_ind_m_a4:
4797 	mov.l		%a4,%d0			# Get current a4
4798 	sub.l		%a0,%d0			# Decrement
4799 	mov.l		%d0,%a4			# Save decr value
4800 	mov.l		%d0,%a0
4801 	rts
4802 
4803 faddr_ind_m_a5:
4804 	mov.l		%a5,%d0			# Get current a5
4805 	sub.l		%a0,%d0			# Decrement
4806 	mov.l		%d0,%a5			# Save decr value
4807 	mov.l		%d0,%a0
4808 	rts
4809 
4810 faddr_ind_m_a6:
4811 	mov.l		(%a6),%d0		# Get current a6
4812 	sub.l		%a0,%d0			# Decrement
4813 	mov.l		%d0,(%a6)		# Save decr value
4814 	mov.l		%d0,%a0
4815 	rts
4816 
4817 faddr_ind_m_a7:
4818 	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819 
4820 	mov.l		EXC_A7(%a6),%d0		# Get current a7
4821 	sub.l		%a0,%d0			# Decrement
4822 	mov.l		%d0,EXC_A7(%a6)		# Save decr value
4823 	mov.l		%d0,%a0
4824 	rts
4825 
4826 ########################################################
4827 # Address register indirect w/ displacement: (d16, An) #
4828 ########################################################
4829 faddr_ind_disp_a0:
4830 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4831 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4832 	bsr.l		_imem_read_word
4833 
4834 	tst.l		%d1			# did ifetch fail?
4835 	bne.l		iea_iacc		# yes
4836 
4837 	mov.w		%d0,%a0			# sign extend displacement
4838 
4839 	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
4840 	rts
4841 
4842 faddr_ind_disp_a1:
4843 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4844 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4845 	bsr.l		_imem_read_word
4846 
4847 	tst.l		%d1			# did ifetch fail?
4848 	bne.l		iea_iacc		# yes
4849 
4850 	mov.w		%d0,%a0			# sign extend displacement
4851 
4852 	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
4853 	rts
4854 
4855 faddr_ind_disp_a2:
4856 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4857 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4858 	bsr.l		_imem_read_word
4859 
4860 	tst.l		%d1			# did ifetch fail?
4861 	bne.l		iea_iacc		# yes
4862 
4863 	mov.w		%d0,%a0			# sign extend displacement
4864 
4865 	add.l		%a2,%a0			# a2 + d16
4866 	rts
4867 
4868 faddr_ind_disp_a3:
4869 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4870 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4871 	bsr.l		_imem_read_word
4872 
4873 	tst.l		%d1			# did ifetch fail?
4874 	bne.l		iea_iacc		# yes
4875 
4876 	mov.w		%d0,%a0			# sign extend displacement
4877 
4878 	add.l		%a3,%a0			# a3 + d16
4879 	rts
4880 
4881 faddr_ind_disp_a4:
4882 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4883 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4884 	bsr.l		_imem_read_word
4885 
4886 	tst.l		%d1			# did ifetch fail?
4887 	bne.l		iea_iacc		# yes
4888 
4889 	mov.w		%d0,%a0			# sign extend displacement
4890 
4891 	add.l		%a4,%a0			# a4 + d16
4892 	rts
4893 
4894 faddr_ind_disp_a5:
4895 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4896 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4897 	bsr.l		_imem_read_word
4898 
4899 	tst.l		%d1			# did ifetch fail?
4900 	bne.l		iea_iacc		# yes
4901 
4902 	mov.w		%d0,%a0			# sign extend displacement
4903 
4904 	add.l		%a5,%a0			# a5 + d16
4905 	rts
4906 
4907 faddr_ind_disp_a6:
4908 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4909 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4910 	bsr.l		_imem_read_word
4911 
4912 	tst.l		%d1			# did ifetch fail?
4913 	bne.l		iea_iacc		# yes
4914 
4915 	mov.w		%d0,%a0			# sign extend displacement
4916 
4917 	add.l		(%a6),%a0		# a6 + d16
4918 	rts
4919 
4920 faddr_ind_disp_a7:
4921 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4922 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4923 	bsr.l		_imem_read_word
4924 
4925 	tst.l		%d1			# did ifetch fail?
4926 	bne.l		iea_iacc		# yes
4927 
4928 	mov.w		%d0,%a0			# sign extend displacement
4929 
4930 	add.l		EXC_A7(%a6),%a0		# a7 + d16
4931 	rts
4932 
4933 ########################################################################
4934 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4936 # Memory indirect postindexed: ([bd, An], Xn, od)		       #
4937 # Memory indirect preindexed: ([bd, An, Xn], od)		       #
4938 ########################################################################
4939 faddr_ind_ext:
4940 	addq.l		&0x8,%d1
4941 	bsr.l		fetch_dreg		# fetch base areg
4942 	mov.l		%d0,-(%sp)
4943 
4944 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4945 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4946 	bsr.l		_imem_read_word		# fetch extword in d0
4947 
4948 	tst.l		%d1			# did ifetch fail?
4949 	bne.l		iea_iacc		# yes
4950 
4951 	mov.l		(%sp)+,%a0
4952 
4953 	btst		&0x8,%d0
4954 	bne.w		fcalc_mem_ind
4955 
4956 	mov.l		%d0,L_SCR1(%a6)		# hold opword
4957 
4958 	mov.l		%d0,%d1
4959 	rol.w		&0x4,%d1
4960 	andi.w		&0xf,%d1		# extract index regno
4961 
4962 # count on fetch_dreg() not to alter a0...
4963 	bsr.l		fetch_dreg		# fetch index
4964 
4965 	mov.l		%d2,-(%sp)		# save d2
4966 	mov.l		L_SCR1(%a6),%d2		# fetch opword
4967 
4968 	btst		&0xb,%d2		# is it word or long?
4969 	bne.b		faii8_long
4970 	ext.l		%d0			# sign extend word index
4971 faii8_long:
4972 	mov.l		%d2,%d1
4973 	rol.w		&0x7,%d1
4974 	andi.l		&0x3,%d1		# extract scale value
4975 
4976 	lsl.l		%d1,%d0			# shift index by scale
4977 
4978 	extb.l		%d2			# sign extend displacement
4979 	add.l		%d2,%d0			# index + disp
4980 	add.l		%d0,%a0			# An + (index + disp)
4981 
4982 	mov.l		(%sp)+,%d2		# restore old d2
4983 	rts
4984 
4985 ###########################
4986 # Absolute short: (XXX).W #
4987 ###########################
4988 fabs_short:
4989 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4990 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4991 	bsr.l		_imem_read_word		# fetch short address
4992 
4993 	tst.l		%d1			# did ifetch fail?
4994 	bne.l		iea_iacc		# yes
4995 
4996 	mov.w		%d0,%a0			# return <ea> in a0
4997 	rts
4998 
4999 ##########################
5000 # Absolute long: (XXX).L #
5001 ##########################
5002 fabs_long:
5003 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5004 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5005 	bsr.l		_imem_read_long		# fetch long address
5006 
5007 	tst.l		%d1			# did ifetch fail?
5008 	bne.l		iea_iacc		# yes
5009 
5010 	mov.l		%d0,%a0			# return <ea> in a0
5011 	rts
5012 
5013 #######################################################
5014 # Program counter indirect w/ displacement: (d16, PC) #
5015 #######################################################
5016 fpc_ind:
5017 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5018 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5019 	bsr.l		_imem_read_word		# fetch word displacement
5020 
5021 	tst.l		%d1			# did ifetch fail?
5022 	bne.l		iea_iacc		# yes
5023 
5024 	mov.w		%d0,%a0			# sign extend displacement
5025 
5026 	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
5027 
5028 # _imem_read_word() increased the extwptr by 2. need to adjust here.
5029 	subq.l		&0x2,%a0		# adjust <ea>
5030 	rts
5031 
5032 ##########################################################
5033 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034 # "     "     w/   "  (base displacement): (bd, PC, An)  #
5035 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5036 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5037 ##########################################################
5038 fpc_ind_ext:
5039 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5040 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5041 	bsr.l		_imem_read_word		# fetch ext word
5042 
5043 	tst.l		%d1			# did ifetch fail?
5044 	bne.l		iea_iacc		# yes
5045 
5046 	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
5047 	subq.l		&0x2,%a0		# adjust base
5048 
5049 	btst		&0x8,%d0		# is disp only 8 bits?
5050 	bne.w		fcalc_mem_ind		# calc memory indirect
5051 
5052 	mov.l		%d0,L_SCR1(%a6)		# store opword
5053 
5054 	mov.l		%d0,%d1			# make extword copy
5055 	rol.w		&0x4,%d1		# rotate reg num into place
5056 	andi.w		&0xf,%d1		# extract register number
5057 
5058 # count on fetch_dreg() not to alter a0...
5059 	bsr.l		fetch_dreg		# fetch index
5060 
5061 	mov.l		%d2,-(%sp)		# save d2
5062 	mov.l		L_SCR1(%a6),%d2		# fetch opword
5063 
5064 	btst		&0xb,%d2		# is index word or long?
5065 	bne.b		fpii8_long		# long
5066 	ext.l		%d0			# sign extend word index
5067 fpii8_long:
5068 	mov.l		%d2,%d1
5069 	rol.w		&0x7,%d1		# rotate scale value into place
5070 	andi.l		&0x3,%d1		# extract scale value
5071 
5072 	lsl.l		%d1,%d0			# shift index by scale
5073 
5074 	extb.l		%d2			# sign extend displacement
5075 	add.l		%d2,%d0			# disp + index
5076 	add.l		%d0,%a0			# An + (index + disp)
5077 
5078 	mov.l		(%sp)+,%d2		# restore temp register
5079 	rts
5080 
5081 # d2 = index
5082 # d3 = base
5083 # d4 = od
5084 # d5 = extword
5085 fcalc_mem_ind:
5086 	btst		&0x6,%d0		# is the index suppressed?
5087 	beq.b		fcalc_index
5088 
5089 	movm.l		&0x3c00,-(%sp)		# save d2-d5
5090 
5091 	mov.l		%d0,%d5			# put extword in d5
5092 	mov.l		%a0,%d3			# put base in d3
5093 
5094 	clr.l		%d2			# yes, so index = 0
5095 	bra.b		fbase_supp_ck
5096 
5097 # index:
5098 fcalc_index:
5099 	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
5100 	bfextu		%d0{&16:&4},%d1		# fetch dreg index
5101 	bsr.l		fetch_dreg
5102 
5103 	movm.l		&0x3c00,-(%sp)		# save d2-d5
5104 	mov.l		%d0,%d2			# put index in d2
5105 	mov.l		L_SCR1(%a6),%d5
5106 	mov.l		%a0,%d3
5107 
5108 	btst		&0xb,%d5		# is index word or long?
5109 	bne.b		fno_ext
5110 	ext.l		%d2
5111 
5112 fno_ext:
5113 	bfextu		%d5{&21:&2},%d0
5114 	lsl.l		%d0,%d2
5115 
5116 # base address (passed as parameter in d3):
5117 # we clear the value here if it should actually be suppressed.
5118 fbase_supp_ck:
5119 	btst		&0x7,%d5		# is the bd suppressed?
5120 	beq.b		fno_base_sup
5121 	clr.l		%d3
5122 
5123 # base displacement:
5124 fno_base_sup:
5125 	bfextu		%d5{&26:&2},%d0		# get bd size
5126 #	beq.l		fmovm_error		# if (size == 0) it's reserved
5127 
5128 	cmpi.b		%d0,&0x2
5129 	blt.b		fno_bd
5130 	beq.b		fget_word_bd
5131 
5132 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5133 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5134 	bsr.l		_imem_read_long
5135 
5136 	tst.l		%d1			# did ifetch fail?
5137 	bne.l		fcea_iacc		# yes
5138 
5139 	bra.b		fchk_ind
5140 
5141 fget_word_bd:
5142 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5143 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5144 	bsr.l		_imem_read_word
5145 
5146 	tst.l		%d1			# did ifetch fail?
5147 	bne.l		fcea_iacc		# yes
5148 
5149 	ext.l		%d0			# sign extend bd
5150 
5151 fchk_ind:
5152 	add.l		%d0,%d3			# base += bd
5153 
5154 # outer displacement:
5155 fno_bd:
5156 	bfextu		%d5{&30:&2},%d0		# is od suppressed?
5157 	beq.w		faii_bd
5158 
5159 	cmpi.b		%d0,&0x2
5160 	blt.b		fnull_od
5161 	beq.b		fword_od
5162 
5163 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5164 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5165 	bsr.l		_imem_read_long
5166 
5167 	tst.l		%d1			# did ifetch fail?
5168 	bne.l		fcea_iacc		# yes
5169 
5170 	bra.b		fadd_them
5171 
5172 fword_od:
5173 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5174 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5175 	bsr.l		_imem_read_word
5176 
5177 	tst.l		%d1			# did ifetch fail?
5178 	bne.l		fcea_iacc		# yes
5179 
5180 	ext.l		%d0			# sign extend od
5181 	bra.b		fadd_them
5182 
5183 fnull_od:
5184 	clr.l		%d0
5185 
5186 fadd_them:
5187 	mov.l		%d0,%d4
5188 
5189 	btst		&0x2,%d5		# pre or post indexing?
5190 	beq.b		fpre_indexed
5191 
5192 	mov.l		%d3,%a0
5193 	bsr.l		_dmem_read_long
5194 
5195 	tst.l		%d1			# did dfetch fail?
5196 	bne.w		fcea_err		# yes
5197 
5198 	add.l		%d2,%d0			# <ea> += index
5199 	add.l		%d4,%d0			# <ea> += od
5200 	bra.b		fdone_ea
5201 
5202 fpre_indexed:
5203 	add.l		%d2,%d3			# preindexing
5204 	mov.l		%d3,%a0
5205 	bsr.l		_dmem_read_long
5206 
5207 	tst.l		%d1			# did dfetch fail?
5208 	bne.w		fcea_err		# yes
5209 
5210 	add.l		%d4,%d0			# ea += od
5211 	bra.b		fdone_ea
5212 
5213 faii_bd:
5214 	add.l		%d2,%d3			# ea = (base + bd) + index
5215 	mov.l		%d3,%d0
5216 fdone_ea:
5217 	mov.l		%d0,%a0
5218 
5219 	movm.l		(%sp)+,&0x003c		# restore d2-d5
5220 	rts
5221 
5222 #########################################################
5223 fcea_err:
5224 	mov.l		%d3,%a0
5225 
5226 	movm.l		(%sp)+,&0x003c		# restore d2-d5
5227 	mov.w		&0x0101,%d0
5228 	bra.l		iea_dacc
5229 
5230 fcea_iacc:
5231 	movm.l		(%sp)+,&0x003c		# restore d2-d5
5232 	bra.l		iea_iacc
5233 
5234 fmovm_out_err:
5235 	bsr.l		restore
5236 	mov.w		&0x00e1,%d0
5237 	bra.b		fmovm_err
5238 
5239 fmovm_in_err:
5240 	bsr.l		restore
5241 	mov.w		&0x0161,%d0
5242 
5243 fmovm_err:
5244 	mov.l		L_SCR1(%a6),%a0
5245 	bra.l		iea_dacc
5246 
5247 #########################################################################
5248 # XDEF ****************************************************************	#
5249 #	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
5250 #									#
5251 # XREF ****************************************************************	#
5252 #	_imem_read_long() - read longword from memory			#
5253 #	iea_iacc() - _imem_read_long() failed; error recovery		#
5254 #									#
5255 # INPUT ***************************************************************	#
5256 #	None								#
5257 #									#
5258 # OUTPUT **************************************************************	#
5259 #	If _imem_read_long() doesn't fail:				#
5260 #		USER_FPCR(a6)  = new FPCR value				#
5261 #		USER_FPSR(a6)  = new FPSR value				#
5262 #		USER_FPIAR(a6) = new FPIAR value			#
5263 #									#
5264 # ALGORITHM ***********************************************************	#
5265 #	Decode the instruction type by looking at the extension word	#
5266 # in order to see how many control registers to fetch from memory.	#
5267 # Fetch them using _imem_read_long(). If this fetch fails, exit through	#
5268 # the special access error exit handler iea_iacc().			#
5269 #									#
5270 # Instruction word decoding:						#
5271 #									#
5272 #	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
5273 #									#
5274 #		WORD1			WORD2				#
5275 #	1111 0010 00 111100	100$ $$00 0000 0000			#
5276 #									#
5277 #	$$$ (100): FPCR							#
5278 #	    (010): FPSR							#
5279 #	    (001): FPIAR						#
5280 #	    (000): FPIAR						#
5281 #									#
5282 #########################################################################
5283 
5284 	global		fmovm_ctrl
5285 fmovm_ctrl:
5286 	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
5287 	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
5288 	beq.w		fctrl_in_7		# yes
5289 	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
5290 	beq.w		fctrl_in_6		# yes
5291 	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
5292 	beq.b		fctrl_in_5		# yes
5293 
5294 # fmovem.l #<data>, fpsr/fpiar
5295 fctrl_in_3:
5296 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5297 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5298 	bsr.l		_imem_read_long		# fetch FPSR from mem
5299 
5300 	tst.l		%d1			# did ifetch fail?
5301 	bne.l		iea_iacc		# yes
5302 
5303 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
5304 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5305 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5306 	bsr.l		_imem_read_long		# fetch FPIAR from mem
5307 
5308 	tst.l		%d1			# did ifetch fail?
5309 	bne.l		iea_iacc		# yes
5310 
5311 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5312 	rts
5313 
5314 # fmovem.l #<data>, fpcr/fpiar
5315 fctrl_in_5:
5316 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5317 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5318 	bsr.l		_imem_read_long		# fetch FPCR from mem
5319 
5320 	tst.l		%d1			# did ifetch fail?
5321 	bne.l		iea_iacc		# yes
5322 
5323 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
5324 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5325 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5326 	bsr.l		_imem_read_long		# fetch FPIAR from mem
5327 
5328 	tst.l		%d1			# did ifetch fail?
5329 	bne.l		iea_iacc		# yes
5330 
5331 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5332 	rts
5333 
5334 # fmovem.l #<data>, fpcr/fpsr
5335 fctrl_in_6:
5336 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5337 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5338 	bsr.l		_imem_read_long		# fetch FPCR from mem
5339 
5340 	tst.l		%d1			# did ifetch fail?
5341 	bne.l		iea_iacc		# yes
5342 
5343 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5344 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5345 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5346 	bsr.l		_imem_read_long		# fetch FPSR from mem
5347 
5348 	tst.l		%d1			# did ifetch fail?
5349 	bne.l		iea_iacc		# yes
5350 
5351 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5352 	rts
5353 
5354 # fmovem.l #<data>, fpcr/fpsr/fpiar
5355 fctrl_in_7:
5356 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5357 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5358 	bsr.l		_imem_read_long		# fetch FPCR from mem
5359 
5360 	tst.l		%d1			# did ifetch fail?
5361 	bne.l		iea_iacc		# yes
5362 
5363 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5364 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5365 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5366 	bsr.l		_imem_read_long		# fetch FPSR from mem
5367 
5368 	tst.l		%d1			# did ifetch fail?
5369 	bne.l		iea_iacc		# yes
5370 
5371 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5372 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5373 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5374 	bsr.l		_imem_read_long		# fetch FPIAR from mem
5375 
5376 	tst.l		%d1			# did ifetch fail?
5377 	bne.l		iea_iacc		# yes
5378 
5379 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
5380 	rts
5381 
5382 ##########################################################################
5383 
5384 #########################################################################
5385 # XDEF ****************************************************************	#
5386 #	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
5387 #			  OVFL/UNFL exceptions will result		#
5388 #									#
5389 # XREF ****************************************************************	#
5390 #	norm() - normalize mantissa after adjusting exponent		#
5391 #									#
5392 # INPUT ***************************************************************	#
5393 #	FP_SRC(a6) = fp op1(src)					#
5394 #	FP_DST(a6) = fp op2(dst)					#
5395 #									#
5396 # OUTPUT **************************************************************	#
5397 #	FP_SRC(a6) = fp op1 scaled(src)					#
5398 #	FP_DST(a6) = fp op2 scaled(dst)					#
5399 #	d0         = scale amount					#
5400 #									#
5401 # ALGORITHM ***********************************************************	#
5402 #	If the DST exponent is > the SRC exponent, set the DST exponent	#
5403 # equal to 0x3fff and scale the SRC exponent by the value that the	#
5404 # DST exponent was scaled by. If the SRC exponent is greater or equal,	#
5405 # do the opposite. Return this scale factor in d0.			#
5406 #	If the two exponents differ by > the number of mantissa bits	#
5407 # plus two, then set the smallest exponent to a very small value as a	#
5408 # quick shortcut.							#
5409 #									#
5410 #########################################################################
5411 
5412 	global		addsub_scaler2
5413 addsub_scaler2:
5414 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
5415 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
5416 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
5417 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
5418 	mov.w		SRC_EX(%a0),%d0
5419 	mov.w		DST_EX(%a1),%d1
5420 	mov.w		%d0,FP_SCR0_EX(%a6)
5421 	mov.w		%d1,FP_SCR1_EX(%a6)
5422 
5423 	andi.w		&0x7fff,%d0
5424 	andi.w		&0x7fff,%d1
5425 	mov.w		%d0,L_SCR1(%a6)		# store src exponent
5426 	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
5427 
5428 	cmp.w		%d0, %d1		# is src exp >= dst exp?
5429 	bge.l		src_exp_ge2
5430 
5431 # dst exp is >  src exp; scale dst to exp = 0x3fff
5432 dst_exp_gt2:
5433 	bsr.l		scale_to_zero_dst
5434 	mov.l		%d0,-(%sp)		# save scale factor
5435 
5436 	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
5437 	bne.b		cmpexp12
5438 
5439 	lea		FP_SCR0(%a6),%a0
5440 	bsr.l		norm			# normalize the denorm; result is new exp
5441 	neg.w		%d0			# new exp = -(shft val)
5442 	mov.w		%d0,L_SCR1(%a6)		# inset new exp
5443 
5444 cmpexp12:
5445 	mov.w		2+L_SCR1(%a6),%d0
5446 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5447 
5448 	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
5449 	bge.b		quick_scale12
5450 
5451 	mov.w		L_SCR1(%a6),%d0
5452 	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
5453 	mov.w		FP_SCR0_EX(%a6),%d1
5454 	and.w		&0x8000,%d1
5455 	or.w		%d1,%d0			# concat {sgn,new exp}
5456 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
5457 
5458 	mov.l		(%sp)+,%d0		# return SCALE factor
5459 	rts
5460 
5461 quick_scale12:
5462 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
5463 	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
5464 
5465 	mov.l		(%sp)+,%d0		# return SCALE factor
5466 	rts
5467 
5468 # src exp is >= dst exp; scale src to exp = 0x3fff
5469 src_exp_ge2:
5470 	bsr.l		scale_to_zero_src
5471 	mov.l		%d0,-(%sp)		# save scale factor
5472 
5473 	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
5474 	bne.b		cmpexp22
5475 	lea		FP_SCR1(%a6),%a0
5476 	bsr.l		norm			# normalize the denorm; result is new exp
5477 	neg.w		%d0			# new exp = -(shft val)
5478 	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
5479 
5480 cmpexp22:
5481 	mov.w		L_SCR1(%a6),%d0
5482 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5483 
5484 	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
5485 	bge.b		quick_scale22
5486 
5487 	mov.w		2+L_SCR1(%a6),%d0
5488 	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
5489 	mov.w		FP_SCR1_EX(%a6),%d1
5490 	andi.w		&0x8000,%d1
5491 	or.w		%d1,%d0			# concat {sgn,new exp}
5492 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
5493 
5494 	mov.l		(%sp)+,%d0		# return SCALE factor
5495 	rts
5496 
5497 quick_scale22:
5498 	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
5499 	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
5500 
5501 	mov.l		(%sp)+,%d0		# return SCALE factor
5502 	rts
5503 
5504 ##########################################################################
5505 
5506 #########################################################################
5507 # XDEF ****************************************************************	#
5508 #	scale_to_zero_src(): scale the exponent of extended precision	#
5509 #			     value at FP_SCR0(a6).			#
5510 #									#
5511 # XREF ****************************************************************	#
5512 #	norm() - normalize the mantissa if the operand was a DENORM	#
5513 #									#
5514 # INPUT ***************************************************************	#
5515 #	FP_SCR0(a6) = extended precision operand to be scaled		#
5516 #									#
5517 # OUTPUT **************************************************************	#
5518 #	FP_SCR0(a6) = scaled extended precision operand			#
5519 #	d0	    = scale value					#
5520 #									#
5521 # ALGORITHM ***********************************************************	#
5522 #	Set the exponent of the input operand to 0x3fff. Save the value	#
5523 # of the difference between the original and new exponent. Then,	#
5524 # normalize the operand if it was a DENORM. Add this normalization	#
5525 # value to the previous value. Return the result.			#
5526 #									#
5527 #########################################################################
5528 
5529 	global		scale_to_zero_src
5530 scale_to_zero_src:
5531 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5532 	mov.w		%d1,%d0			# make a copy
5533 
5534 	andi.l		&0x7fff,%d1		# extract operand's exponent
5535 
5536 	andi.w		&0x8000,%d0		# extract operand's sgn
5537 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5538 
5539 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
5540 
5541 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5542 	beq.b		stzs_denorm		# normalize the DENORM
5543 
5544 stzs_norm:
5545 	mov.l		&0x3fff,%d0
5546 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5547 
5548 	rts
5549 
5550 stzs_denorm:
5551 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5552 	bsr.l		norm			# normalize denorm
5553 	neg.l		%d0			# new exponent = -(shft val)
5554 	mov.l		%d0,%d1			# prepare for op_norm call
5555 	bra.b		stzs_norm		# finish scaling
5556 
5557 ###
5558 
5559 #########################################################################
5560 # XDEF ****************************************************************	#
5561 #	scale_sqrt(): scale the input operand exponent so a subsequent	#
5562 #		      fsqrt operation won't take an exception.		#
5563 #									#
5564 # XREF ****************************************************************	#
5565 #	norm() - normalize the mantissa if the operand was a DENORM	#
5566 #									#
5567 # INPUT ***************************************************************	#
5568 #	FP_SCR0(a6) = extended precision operand to be scaled		#
5569 #									#
5570 # OUTPUT **************************************************************	#
5571 #	FP_SCR0(a6) = scaled extended precision operand			#
5572 #	d0	    = scale value					#
5573 #									#
5574 # ALGORITHM ***********************************************************	#
5575 #	If the input operand is a DENORM, normalize it.			#
5576 #	If the exponent of the input operand is even, set the exponent	#
5577 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
5578 # exponent of the input operand is off, set the exponent to ox3fff and	#
5579 # return a scale factor of "(exp-0x3fff)/2".				#
5580 #									#
5581 #########################################################################
5582 
5583 	global		scale_sqrt
5584 scale_sqrt:
5585 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5586 	beq.b		ss_denorm		# normalize the DENORM
5587 
5588 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5589 	andi.l		&0x7fff,%d1		# extract operand's exponent
5590 
5591 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
5592 
5593 	btst		&0x0,%d1		# is exp even or odd?
5594 	beq.b		ss_norm_even
5595 
5596 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5597 
5598 	mov.l		&0x3fff,%d0
5599 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5600 	asr.l		&0x1,%d0		# divide scale factor by 2
5601 	rts
5602 
5603 ss_norm_even:
5604 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5605 
5606 	mov.l		&0x3ffe,%d0
5607 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5608 	asr.l		&0x1,%d0		# divide scale factor by 2
5609 	rts
5610 
5611 ss_denorm:
5612 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5613 	bsr.l		norm			# normalize denorm
5614 
5615 	btst		&0x0,%d0		# is exp even or odd?
5616 	beq.b		ss_denorm_even
5617 
5618 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5619 
5620 	add.l		&0x3fff,%d0
5621 	asr.l		&0x1,%d0		# divide scale factor by 2
5622 	rts
5623 
5624 ss_denorm_even:
5625 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5626 
5627 	add.l		&0x3ffe,%d0
5628 	asr.l		&0x1,%d0		# divide scale factor by 2
5629 	rts
5630 
5631 ###
5632 
5633 #########################################################################
5634 # XDEF ****************************************************************	#
5635 #	scale_to_zero_dst(): scale the exponent of extended precision	#
5636 #			     value at FP_SCR1(a6).			#
5637 #									#
5638 # XREF ****************************************************************	#
5639 #	norm() - normalize the mantissa if the operand was a DENORM	#
5640 #									#
5641 # INPUT ***************************************************************	#
5642 #	FP_SCR1(a6) = extended precision operand to be scaled		#
5643 #									#
5644 # OUTPUT **************************************************************	#
5645 #	FP_SCR1(a6) = scaled extended precision operand			#
5646 #	d0	    = scale value					#
5647 #									#
5648 # ALGORITHM ***********************************************************	#
5649 #	Set the exponent of the input operand to 0x3fff. Save the value	#
5650 # of the difference between the original and new exponent. Then,	#
5651 # normalize the operand if it was a DENORM. Add this normalization	#
5652 # value to the previous value. Return the result.			#
5653 #									#
5654 #########################################################################
5655 
5656 	global		scale_to_zero_dst
5657 scale_to_zero_dst:
5658 	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
5659 	mov.w		%d1,%d0			# make a copy
5660 
5661 	andi.l		&0x7fff,%d1		# extract operand's exponent
5662 
5663 	andi.w		&0x8000,%d0		# extract operand's sgn
5664 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5665 
5666 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
5667 
5668 	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
5669 	beq.b		stzd_denorm		# normalize the DENORM
5670 
5671 stzd_norm:
5672 	mov.l		&0x3fff,%d0
5673 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5674 	rts
5675 
5676 stzd_denorm:
5677 	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
5678 	bsr.l		norm			# normalize denorm
5679 	neg.l		%d0			# new exponent = -(shft val)
5680 	mov.l		%d0,%d1			# prepare for op_norm call
5681 	bra.b		stzd_norm		# finish scaling
5682 
5683 ##########################################################################
5684 
5685 #########################################################################
5686 # XDEF ****************************************************************	#
5687 #	res_qnan(): return default result w/ QNAN operand for dyadic	#
5688 #	res_snan(): return default result w/ SNAN operand for dyadic	#
5689 #	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
5690 #	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
5691 #									#
5692 # XREF ****************************************************************	#
5693 #	None								#
5694 #									#
5695 # INPUT ***************************************************************	#
5696 #	FP_SRC(a6) = pointer to extended precision src operand		#
5697 #	FP_DST(a6) = pointer to extended precision dst operand		#
5698 #									#
5699 # OUTPUT **************************************************************	#
5700 #	fp0 = default result						#
5701 #									#
5702 # ALGORITHM ***********************************************************	#
5703 #	If either operand (but not both operands) of an operation is a	#
5704 # nonsignalling NAN, then that NAN is returned as the result. If both	#
5705 # operands are nonsignalling NANs, then the destination operand		#
5706 # nonsignalling NAN is returned as the result.				#
5707 #	If either operand to an operation is a signalling NAN (SNAN),	#
5708 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
5709 # enable bit is set in the FPCR, then the trap is taken and the		#
5710 # destination is not modified. If the SNAN trap enable bit is not set,	#
5711 # then the SNAN is converted to a nonsignalling NAN (by setting the	#
5712 # SNAN bit in the operand to one), and the operation continues as	#
5713 # described in the preceding paragraph, for nonsignalling NANs.		#
5714 #	Make sure the appropriate FPSR bits are set before exiting.	#
5715 #									#
5716 #########################################################################
5717 
5718 	global		res_qnan
5719 	global		res_snan
5720 res_qnan:
5721 res_snan:
5722 	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
5723 	beq.b		dst_snan2
5724 	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
5725 	beq.b		dst_qnan2
5726 src_nan:
5727 	cmp.b		STAG(%a6), &QNAN
5728 	beq.b		src_qnan2
5729 	global		res_snan_1op
5730 res_snan_1op:
5731 src_snan2:
5732 	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
5733 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734 	lea		FP_SRC(%a6), %a0
5735 	bra.b		nan_comp
5736 	global		res_qnan_1op
5737 res_qnan_1op:
5738 src_qnan2:
5739 	or.l		&nan_mask, USER_FPSR(%a6)
5740 	lea		FP_SRC(%a6), %a0
5741 	bra.b		nan_comp
5742 dst_snan2:
5743 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744 	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
5745 	lea		FP_DST(%a6), %a0
5746 	bra.b		nan_comp
5747 dst_qnan2:
5748 	lea		FP_DST(%a6), %a0
5749 	cmp.b		STAG(%a6), &SNAN
5750 	bne		nan_done
5751 	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
5752 nan_done:
5753 	or.l		&nan_mask, USER_FPSR(%a6)
5754 nan_comp:
5755 	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
5756 	beq.b		nan_not_neg
5757 	or.l		&neg_mask, USER_FPSR(%a6)
5758 nan_not_neg:
5759 	fmovm.x		(%a0), &0x80
5760 	rts
5761 
5762 #########################################################################
5763 # XDEF ****************************************************************	#
5764 #	res_operr(): return default result during operand error		#
5765 #									#
5766 # XREF ****************************************************************	#
5767 #	None								#
5768 #									#
5769 # INPUT ***************************************************************	#
5770 #	None								#
5771 #									#
5772 # OUTPUT **************************************************************	#
5773 #	fp0 = default operand error result				#
5774 #									#
5775 # ALGORITHM ***********************************************************	#
5776 #	An nonsignalling NAN is returned as the default result when	#
5777 # an operand error occurs for the following cases:			#
5778 #									#
5779 #	Multiply: (Infinity x Zero)					#
5780 #	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
5781 #									#
5782 #########################################################################
5783 
5784 	global		res_operr
5785 res_operr:
5786 	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787 	fmovm.x		nan_return(%pc), &0x80
5788 	rts
5789 
5790 nan_return:
5791 	long		0x7fff0000, 0xffffffff, 0xffffffff
5792 
5793 #########################################################################
5794 # XDEF ****************************************************************	#
5795 #	_denorm(): denormalize an intermediate result			#
5796 #									#
5797 # XREF ****************************************************************	#
5798 #	None								#
5799 #									#
5800 # INPUT *************************************************************** #
5801 #	a0 = points to the operand to be denormalized			#
5802 #		(in the internal extended format)			#
5803 #									#
5804 #	d0 = rounding precision						#
5805 #									#
5806 # OUTPUT **************************************************************	#
5807 #	a0 = pointer to the denormalized result				#
5808 #		(in the internal extended format)			#
5809 #									#
5810 #	d0 = guard,round,sticky						#
5811 #									#
5812 # ALGORITHM ***********************************************************	#
5813 #	According to the exponent underflow threshold for the given	#
5814 # precision, shift the mantissa bits to the right in order raise the	#
5815 # exponent of the operand to the threshold value. While shifting the	#
5816 # mantissa bits right, maintain the value of the guard, round, and	#
5817 # sticky bits.								#
5818 # other notes:								#
5819 #	(1) _denorm() is called by the underflow routines		#
5820 #	(2) _denorm() does NOT affect the status register		#
5821 #									#
5822 #########################################################################
5823 
5824 #
5825 # table of exponent threshold values for each precision
5826 #
5827 tbl_thresh:
5828 	short		0x0
5829 	short		sgl_thresh
5830 	short		dbl_thresh
5831 
5832 	global		_denorm
5833 _denorm:
5834 #
5835 # Load the exponent threshold for the precision selected and check
5836 # to see if (threshold - exponent) is > 65 in which case we can
5837 # simply calculate the sticky bit and zero the mantissa. otherwise
5838 # we have to call the denormalization routine.
5839 #
5840 	lsr.b		&0x2, %d0		# shift prec to lo bits
5841 	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842 	mov.w		%d1, %d0		# copy d1 into d0
5843 	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
5844 	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
5845 	bpl.b		denorm_set_stky		# yes; just calc sticky
5846 
5847 	clr.l		%d0			# clear g,r,s
5848 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849 	beq.b		denorm_call		# no; don't change anything
5850 	bset		&29, %d0		# yes; set sticky bit
5851 
5852 denorm_call:
5853 	bsr.l		dnrm_lp			# denormalize the number
5854 	rts
5855 
5856 #
5857 # all bit would have been shifted off during the denorm so simply
5858 # calculate if the sticky should be set and clear the entire mantissa.
5859 #
5860 denorm_set_stky:
5861 	mov.l		&0x20000000, %d0	# set sticky bit in return value
5862 	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
5863 	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
5864 	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
5865 	rts
5866 
5867 #									#
5868 # dnrm_lp(): normalize exponent/mantissa to specified threshold		#
5869 #									#
5870 # INPUT:								#
5871 #	%a0	   : points to the operand to be denormalized		#
5872 #	%d0{31:29} : initial guard,round,sticky				#
5873 #	%d1{15:0}  : denormalization threshold				#
5874 # OUTPUT:								#
5875 #	%a0	   : points to the denormalized operand			#
5876 #	%d0{31:29} : final guard,round,sticky				#
5877 #									#
5878 
5879 # *** Local Equates *** #
5880 set	GRS,		L_SCR2			# g,r,s temp storage
5881 set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
5882 
5883 	global		dnrm_lp
5884 dnrm_lp:
5885 
5886 #
5887 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888 # in memory so as to make the bitfield extraction for denormalization easier.
5889 #
5890 	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891 	mov.l		%d0, GRS(%a6)		# place g,r,s after it
5892 
5893 #
5894 # check to see how much less than the underflow threshold the operand
5895 # exponent is.
5896 #
5897 	mov.l		%d1, %d0		# copy the denorm threshold
5898 	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
5899 	ble.b		dnrm_no_lp		# d1 <= 0
5900 	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
5901 	blt.b		case_1			# yes
5902 	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
5903 	blt.b		case_2			# yes
5904 	bra.w		case_3			# (d1 >= 64)
5905 
5906 #
5907 # No normalization necessary
5908 #
5909 dnrm_no_lp:
5910 	mov.l		GRS(%a6), %d0		# restore original g,r,s
5911 	rts
5912 
5913 #
5914 # case (0<d1<32)
5915 #
5916 # %d0 = denorm threshold
5917 # %d1 = "n" = amt to shift
5918 #
5919 #	---------------------------------------------------------
5920 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5921 #	---------------------------------------------------------
5922 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923 #	\	   \		      \			 \
5924 #	 \	    \		       \		  \
5925 #	  \	     \			\		   \
5926 #	   \	      \			 \		    \
5927 #	    \	       \		  \		     \
5928 #	     \		\		   \		      \
5929 #	      \		 \		    \		       \
5930 #	       \	  \		     \			\
5931 #	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932 #	---------------------------------------------------------
5933 #	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
5934 #	---------------------------------------------------------
5935 #
5936 case_1:
5937 	mov.l		%d2, -(%sp)		# create temp storage
5938 
5939 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5940 	mov.l		&32, %d0
5941 	sub.w		%d1, %d0		# %d0 = 32 - %d1
5942 
5943 	cmpi.w		%d1, &29		# is shft amt >= 29
5944 	blt.b		case1_extract		# no; no fix needed
5945 	mov.b		GRS(%a6), %d2
5946 	or.b		%d2, 3+FTEMP_LO2(%a6)
5947 
5948 case1_extract:
5949 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951 	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952 
5953 	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
5954 	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
5955 
5956 	bftst		%d0{&2:&30}		# were bits shifted off?
5957 	beq.b		case1_sticky_clear	# no; go finish
5958 	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
5959 
5960 case1_sticky_clear:
5961 	and.l		&0xe0000000, %d0	# clear all but G,R,S
5962 	mov.l		(%sp)+, %d2		# restore temp register
5963 	rts
5964 
5965 #
5966 # case (32<=d1<64)
5967 #
5968 # %d0 = denorm threshold
5969 # %d1 = "n" = amt to shift
5970 #
5971 #	---------------------------------------------------------
5972 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5973 #	---------------------------------------------------------
5974 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975 #	\	   \		      \
5976 #	 \	    \		       \
5977 #	  \	     \			-------------------
5978 #	   \	      --------------------		   \
5979 #	    -------------------		  \		    \
5980 #			       \	   \		     \
5981 #				\	    \		      \
5982 #				 \	     \		       \
5983 #	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984 #	---------------------------------------------------------
5985 #	|0...............0|0....0| NEW_LO     |grs		|
5986 #	---------------------------------------------------------
5987 #
5988 case_2:
5989 	mov.l		%d2, -(%sp)		# create temp storage
5990 
5991 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5992 	subi.w		&0x20, %d1		# %d1 now between 0 and 32
5993 	mov.l		&0x20, %d0
5994 	sub.w		%d1, %d0		# %d0 = 32 - %d1
5995 
5996 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997 # the number of bits to check for the sticky detect.
5998 # it only plays a role in shift amounts of 61-63.
5999 	mov.b		GRS(%a6), %d2
6000 	or.b		%d2, 3+FTEMP_LO2(%a6)
6001 
6002 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004 
6005 	bftst		%d1{&2:&30}		# were any bits shifted off?
6006 	bne.b		case2_set_sticky	# yes; set sticky bit
6007 	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
6008 	bne.b		case2_set_sticky	# yes; set sticky bit
6009 
6010 	mov.l		%d1, %d0		# move new G,R,S to %d0
6011 	bra.b		case2_end
6012 
6013 case2_set_sticky:
6014 	mov.l		%d1, %d0		# move new G,R,S to %d0
6015 	bset		&rnd_stky_bit, %d0	# set sticky bit
6016 
6017 case2_end:
6018 	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
6019 	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
6020 	and.l		&0xe0000000, %d0	# clear all but G,R,S
6021 
6022 	mov.l		(%sp)+,%d2		# restore temp register
6023 	rts
6024 
6025 #
6026 # case (d1>=64)
6027 #
6028 # %d0 = denorm threshold
6029 # %d1 = amt to shift
6030 #
6031 case_3:
6032 	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
6033 
6034 	cmpi.w		%d1, &65		# is shift amt > 65?
6035 	blt.b		case3_64		# no; it's == 64
6036 	beq.b		case3_65		# no; it's == 65
6037 
6038 #
6039 # case (d1>65)
6040 #
6041 # Shift value is > 65 and out of range. All bits are shifted off.
6042 # Return a zero mantissa with the sticky bit set
6043 #
6044 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6045 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6046 	mov.l		&0x20000000, %d0	# set sticky bit
6047 	rts
6048 
6049 #
6050 # case (d1 == 64)
6051 #
6052 #	---------------------------------------------------------
6053 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6054 #	---------------------------------------------------------
6055 #	<-------(32)------>
6056 #	\		   \
6057 #	 \		    \
6058 #	  \		     \
6059 #	   \		      ------------------------------
6060 #	    -------------------------------		    \
6061 #					   \		     \
6062 #					    \		      \
6063 #					     \		       \
6064 #					      <-------(32)------>
6065 #	---------------------------------------------------------
6066 #	|0...............0|0................0|grs		|
6067 #	---------------------------------------------------------
6068 #
6069 case3_64:
6070 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6071 	mov.l		%d0, %d1		# make a copy
6072 	and.l		&0xc0000000, %d0	# extract G,R
6073 	and.l		&0x3fffffff, %d1	# extract other bits
6074 
6075 	bra.b		case3_complete
6076 
6077 #
6078 # case (d1 == 65)
6079 #
6080 #	---------------------------------------------------------
6081 #	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6082 #	---------------------------------------------------------
6083 #	<-------(32)------>
6084 #	\		   \
6085 #	 \		    \
6086 #	  \		     \
6087 #	   \		      ------------------------------
6088 #	    --------------------------------		    \
6089 #					    \		     \
6090 #					     \		      \
6091 #					      \		       \
6092 #					       <-------(31)----->
6093 #	---------------------------------------------------------
6094 #	|0...............0|0................0|0rs		|
6095 #	---------------------------------------------------------
6096 #
6097 case3_65:
6098 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6099 	and.l		&0x80000000, %d0	# extract R bit
6100 	lsr.l		&0x1, %d0		# shift high bit into R bit
6101 	and.l		&0x7fffffff, %d1	# extract other bits
6102 
6103 case3_complete:
6104 # last operation done was an "and" of the bits shifted off so the condition
6105 # codes are already set so branch accordingly.
6106 	bne.b		case3_set_sticky	# yes; go set new sticky
6107 	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
6108 	bne.b		case3_set_sticky	# yes; go set new sticky
6109 	tst.b		GRS(%a6)		# were any bits shifted off?
6110 	bne.b		case3_set_sticky	# yes; go set new sticky
6111 
6112 #
6113 # no bits were shifted off so don't set the sticky bit.
6114 # the guard and
6115 # the entire mantissa is zero.
6116 #
6117 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6118 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6119 	rts
6120 
6121 #
6122 # some bits were shifted off so set the sticky bit.
6123 # the entire mantissa is zero.
6124 #
6125 case3_set_sticky:
6126 	bset		&rnd_stky_bit,%d0	# set new sticky bit
6127 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6128 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6129 	rts
6130 
6131 #########################################################################
6132 # XDEF ****************************************************************	#
6133 #	_round(): round result according to precision/mode		#
6134 #									#
6135 # XREF ****************************************************************	#
6136 #	None								#
6137 #									#
6138 # INPUT ***************************************************************	#
6139 #	a0	  = ptr to input operand in internal extended format	#
6140 #	d1(hi)    = contains rounding precision:			#
6141 #			ext = $0000xxxx					#
6142 #			sgl = $0004xxxx					#
6143 #			dbl = $0008xxxx					#
6144 #	d1(lo)	  = contains rounding mode:				#
6145 #			RN  = $xxxx0000					#
6146 #			RZ  = $xxxx0001					#
6147 #			RM  = $xxxx0002					#
6148 #			RP  = $xxxx0003					#
6149 #	d0{31:29} = contains the g,r,s bits (extended)			#
6150 #									#
6151 # OUTPUT **************************************************************	#
6152 #	a0 = pointer to rounded result					#
6153 #									#
6154 # ALGORITHM ***********************************************************	#
6155 #	On return the value pointed to by a0 is correctly rounded,	#
6156 #	a0 is preserved and the g-r-s bits in d0 are cleared.		#
6157 #	The result is not typed - the tag field is invalid.  The	#
6158 #	result is still in the internal extended format.		#
6159 #									#
6160 #	The INEX bit of USER_FPSR will be set if the rounded result was	#
6161 #	inexact (i.e. if any of the g-r-s bits were set).		#
6162 #									#
6163 #########################################################################
6164 
6165 	global		_round
6166 _round:
6167 #
6168 # ext_grs() looks at the rounding precision and sets the appropriate
6169 # G,R,S bits.
6170 # If (G,R,S == 0) then result is exact and round is done, else set
6171 # the inex flag in status reg and continue.
6172 #
6173 	bsr.l		ext_grs			# extract G,R,S
6174 
6175 	tst.l		%d0			# are G,R,S zero?
6176 	beq.w		truncate		# yes; round is complete
6177 
6178 	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179 
6180 #
6181 # Use rounding mode as an index into a jump table for these modes.
6182 # All of the following assumes grs != 0.
6183 #
6184 	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185 	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
6186 
6187 tbl_mode:
6188 	short		rnd_near - tbl_mode
6189 	short		truncate - tbl_mode	# RZ always truncates
6190 	short		rnd_mnus - tbl_mode
6191 	short		rnd_plus - tbl_mode
6192 
6193 #################################################################
6194 #	ROUND PLUS INFINITY					#
6195 #								#
6196 #	If sign of fp number = 0 (positive), then add 1 to l.	#
6197 #################################################################
6198 rnd_plus:
6199 	tst.b		FTEMP_SGN(%a0)		# check for sign
6200 	bmi.w		truncate		# if positive then truncate
6201 
6202 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6203 	swap		%d1			# set up d1 for round prec.
6204 
6205 	cmpi.b		%d1, &s_mode		# is prec = sgl?
6206 	beq.w		add_sgl			# yes
6207 	bgt.w		add_dbl			# no; it's dbl
6208 	bra.w		add_ext			# no; it's ext
6209 
6210 #################################################################
6211 #	ROUND MINUS INFINITY					#
6212 #								#
6213 #	If sign of fp number = 1 (negative), then add 1 to l.	#
6214 #################################################################
6215 rnd_mnus:
6216 	tst.b		FTEMP_SGN(%a0)		# check for sign
6217 	bpl.w		truncate		# if negative then truncate
6218 
6219 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6220 	swap		%d1			# set up d1 for round prec.
6221 
6222 	cmpi.b		%d1, &s_mode		# is prec = sgl?
6223 	beq.w		add_sgl			# yes
6224 	bgt.w		add_dbl			# no; it's dbl
6225 	bra.w		add_ext			# no; it's ext
6226 
6227 #################################################################
6228 #	ROUND NEAREST						#
6229 #								#
6230 #	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
6231 #	Note that this will round to even in case of a tie.	#
6232 #################################################################
6233 rnd_near:
6234 	asl.l		&0x1, %d0		# shift g-bit to c-bit
6235 	bcc.w		truncate		# if (g=1) then
6236 
6237 	swap		%d1			# set up d1 for round prec.
6238 
6239 	cmpi.b		%d1, &s_mode		# is prec = sgl?
6240 	beq.w		add_sgl			# yes
6241 	bgt.w		add_dbl			# no; it's dbl
6242 	bra.w		add_ext			# no; it's ext
6243 
6244 # *** LOCAL EQUATES ***
6245 set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
6246 set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
6247 
6248 #########################
6249 #	ADD SINGLE	#
6250 #########################
6251 add_sgl:
6252 	add.l		&ad_1_sgl, FTEMP_HI(%a0)
6253 	bcc.b		scc_clr			# no mantissa overflow
6254 	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
6255 	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
6256 	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
6257 scc_clr:
6258 	tst.l		%d0			# test for rs = 0
6259 	bne.b		sgl_done
6260 	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261 sgl_done:
6262 	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263 	clr.l		FTEMP_LO(%a0)		# clear d2
6264 	rts
6265 
6266 #########################
6267 #	ADD EXTENDED	#
6268 #########################
6269 add_ext:
6270 	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
6271 	bcc.b		xcc_clr			# test for carry out
6272 	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
6273 	bcc.b		xcc_clr
6274 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6275 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6276 	roxr.w		FTEMP_LO(%a0)
6277 	roxr.w		FTEMP_LO+2(%a0)
6278 	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
6279 xcc_clr:
6280 	tst.l		%d0			# test rs = 0
6281 	bne.b		add_ext_done
6282 	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
6283 add_ext_done:
6284 	rts
6285 
6286 #########################
6287 #	ADD DOUBLE	#
6288 #########################
6289 add_dbl:
6290 	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291 	bcc.b		dcc_clr			# no carry
6292 	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
6293 	bcc.b		dcc_clr			# no carry
6294 
6295 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6296 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6297 	roxr.w		FTEMP_LO(%a0)
6298 	roxr.w		FTEMP_LO+2(%a0)
6299 	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
6300 dcc_clr:
6301 	tst.l		%d0			# test for rs = 0
6302 	bne.b		dbl_done
6303 	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304 
6305 dbl_done:
6306 	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307 	rts
6308 
6309 ###########################
6310 # Truncate all other bits #
6311 ###########################
6312 truncate:
6313 	swap		%d1			# select rnd prec
6314 
6315 	cmpi.b		%d1, &s_mode		# is prec sgl?
6316 	beq.w		sgl_done		# yes
6317 	bgt.b		dbl_done		# no; it's dbl
6318 	rts					# no; it's ext
6319 
6320 
6321 #
6322 # ext_grs(): extract guard, round and sticky bits according to
6323 #	     rounding precision.
6324 #
6325 # INPUT
6326 #	d0	   = extended precision g,r,s (in d0{31:29})
6327 #	d1	   = {PREC,ROUND}
6328 # OUTPUT
6329 #	d0{31:29}  = guard, round, sticky
6330 #
6331 # The ext_grs extract the guard/round/sticky bits according to the
6332 # selected rounding precision. It is called by the round subroutine
6333 # only.  All registers except d0 are kept intact. d0 becomes an
6334 # updated guard,round,sticky in d0{31:29}
6335 #
6336 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337 #	 prior to usage, and needs to restore d1 to original. this
6338 #	 routine is tightly tied to the round routine and not meant to
6339 #	 uphold standard subroutine calling practices.
6340 #
6341 
6342 ext_grs:
6343 	swap		%d1			# have d1.w point to round precision
6344 	tst.b		%d1			# is rnd prec = extended?
6345 	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
6346 
6347 #
6348 # %d0 actually already hold g,r,s since _round() had it before calling
6349 # this function. so, as long as we don't disturb it, we are "returning" it.
6350 #
6351 ext_grs_ext:
6352 	swap		%d1			# yes; return to correct positions
6353 	rts
6354 
6355 ext_grs_not_ext:
6356 	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
6357 
6358 	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
6359 	bne.b		ext_grs_dbl		# no; go handle dbl
6360 
6361 #
6362 # sgl:
6363 #	96		64	  40	32		0
6364 #	-----------------------------------------------------
6365 #	| EXP	|XXXXXXX|	  |xx	|		|grs|
6366 #	-----------------------------------------------------
6367 #			<--(24)--->nn\			   /
6368 #				   ee ---------------------
6369 #				   ww		|
6370 #						v
6371 #				   gr	   new sticky
6372 #
6373 ext_grs_sgl:
6374 	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375 	mov.l		&30, %d2		# of the sgl prec. limits
6376 	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
6377 	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
6378 	and.l		&0x0000003f, %d2	# s bit is the or of all other
6379 	bne.b		ext_grs_st_stky		# bits to the right of g-r
6380 	tst.l		FTEMP_LO(%a0)		# test lower mantissa
6381 	bne.b		ext_grs_st_stky		# if any are set, set sticky
6382 	tst.l		%d0			# test original g,r,s
6383 	bne.b		ext_grs_st_stky		# if any are set, set sticky
6384 	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
6385 
6386 #
6387 # dbl:
6388 #	96		64		32	 11	0
6389 #	-----------------------------------------------------
6390 #	| EXP	|XXXXXXX|		|	 |xx	|grs|
6391 #	-----------------------------------------------------
6392 #						  nn\	    /
6393 #						  ee -------
6394 #						  ww	|
6395 #							v
6396 #						  gr	new sticky
6397 #
6398 ext_grs_dbl:
6399 	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400 	mov.l		&30, %d2		# of the dbl prec. limits
6401 	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
6402 	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
6403 	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
6404 	bne.b		ext_grs_st_stky		# other bits to the right of g-r
6405 	tst.l		%d0			# test word original g,r,s
6406 	bne.b		ext_grs_st_stky		# if any are set, set sticky
6407 	bra.b		ext_grs_end_sd		# if clear, exit
6408 
6409 ext_grs_st_stky:
6410 	bset		&rnd_stky_bit, %d3	# set sticky bit
6411 ext_grs_end_sd:
6412 	mov.l		%d3, %d0		# return grs to d0
6413 
6414 	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
6415 
6416 	swap		%d1			# restore d1 to original
6417 	rts
6418 
6419 #########################################################################
6420 # norm(): normalize the mantissa of an extended precision input. the	#
6421 #	  input operand should not be normalized already.		#
6422 #									#
6423 # XDEF ****************************************************************	#
6424 #	norm()								#
6425 #									#
6426 # XREF **************************************************************** #
6427 #	none								#
6428 #									#
6429 # INPUT *************************************************************** #
6430 #	a0 = pointer fp extended precision operand to normalize		#
6431 #									#
6432 # OUTPUT ************************************************************** #
6433 #	d0 = number of bit positions the mantissa was shifted		#
6434 #	a0 = the input operand's mantissa is normalized; the exponent	#
6435 #	     is unchanged.						#
6436 #									#
6437 #########################################################################
6438 	global		norm
6439 norm:
6440 	mov.l		%d2, -(%sp)		# create some temp regs
6441 	mov.l		%d3, -(%sp)
6442 
6443 	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
6444 	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
6445 
6446 	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
6447 	beq.b		norm_lo			# hi(man) is all zeroes!
6448 
6449 norm_hi:
6450 	lsl.l		%d2, %d0		# left shift hi(man)
6451 	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
6452 
6453 	or.l		%d3, %d0		# create hi(man)
6454 	lsl.l		%d2, %d1		# create lo(man)
6455 
6456 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6457 	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
6458 
6459 	mov.l		%d2, %d0		# return shift amount
6460 
6461 	mov.l		(%sp)+, %d3		# restore temp regs
6462 	mov.l		(%sp)+, %d2
6463 
6464 	rts
6465 
6466 norm_lo:
6467 	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
6468 	lsl.l		%d2, %d1		# shift lo(man)
6469 	add.l		&32, %d2		# add 32 to shft amount
6470 
6471 	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
6472 	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
6473 
6474 	mov.l		%d2, %d0		# return shift amount
6475 
6476 	mov.l		(%sp)+, %d3		# restore temp regs
6477 	mov.l		(%sp)+, %d2
6478 
6479 	rts
6480 
6481 #########################################################################
6482 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
6483 #		- returns corresponding optype tag			#
6484 #									#
6485 # XDEF ****************************************************************	#
6486 #	unnorm_fix()							#
6487 #									#
6488 # XREF **************************************************************** #
6489 #	norm() - normalize the mantissa					#
6490 #									#
6491 # INPUT *************************************************************** #
6492 #	a0 = pointer to unnormalized extended precision number		#
6493 #									#
6494 # OUTPUT ************************************************************** #
6495 #	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
6496 #	a0 = input operand has been converted to a norm, denorm, or	#
6497 #	     zero; both the exponent and mantissa are changed.		#
6498 #									#
6499 #########################################################################
6500 
6501 	global		unnorm_fix
6502 unnorm_fix:
6503 	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504 	bne.b		unnorm_shift		# hi(man) is not all zeroes
6505 
6506 #
6507 # hi(man) is all zeroes so see if any bits in lo(man) are set
6508 #
6509 unnorm_chk_lo:
6510 	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511 	beq.w		unnorm_zero		# yes
6512 
6513 	add.w		&32, %d0		# no; fix shift distance
6514 
6515 #
6516 # d0 = # shifts needed for complete normalization
6517 #
6518 unnorm_shift:
6519 	clr.l		%d1			# clear top word
6520 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6521 	and.w		&0x7fff, %d1		# strip off sgn
6522 
6523 	cmp.w		%d0, %d1		# will denorm push exp < 0?
6524 	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
6525 
6526 #
6527 # exponent would not go < 0. Therefore, number stays normalized
6528 #
6529 	sub.w		%d0, %d1		# shift exponent value
6530 	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
6531 	and.w		&0x8000, %d0		# save old sign
6532 	or.w		%d0, %d1		# {sgn,new exp}
6533 	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
6534 
6535 	bsr.l		norm			# normalize UNNORM
6536 
6537 	mov.b		&NORM, %d0		# return new optype tag
6538 	rts
6539 
6540 #
6541 # exponent would go < 0, so only denormalize until exp = 0
6542 #
6543 unnorm_nrm_zero:
6544 	cmp.b		%d1, &32		# is exp <= 32?
6545 	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
6546 
6547 	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548 	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
6549 
6550 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6551 	lsl.l		%d1, %d0		# extract new lo(man)
6552 	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
6553 
6554 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6555 
6556 	mov.b		&DENORM, %d0		# return new optype tag
6557 	rts
6558 
6559 #
6560 # only mantissa bits set are in lo(man)
6561 #
6562 unnorm_nrm_zero_lrg:
6563 	sub.w		&32, %d1		# adjust shft amt by 32
6564 
6565 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6566 	lsl.l		%d1, %d0		# left shift lo(man)
6567 
6568 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6569 	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
6570 
6571 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6572 
6573 	mov.b		&DENORM, %d0		# return new optype tag
6574 	rts
6575 
6576 #
6577 # whole mantissa is zero so this UNNORM is actually a zero
6578 #
6579 unnorm_zero:
6580 	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
6581 
6582 	mov.b		&ZERO, %d0		# fix optype tag
6583 	rts
6584 
6585 #########################################################################
6586 # XDEF ****************************************************************	#
6587 #	set_tag_x(): return the optype of the input ext fp number	#
6588 #									#
6589 # XREF ****************************************************************	#
6590 #	None								#
6591 #									#
6592 # INPUT ***************************************************************	#
6593 #	a0 = pointer to extended precision operand			#
6594 #									#
6595 # OUTPUT **************************************************************	#
6596 #	d0 = value of type tag						#
6597 #		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
6598 #									#
6599 # ALGORITHM ***********************************************************	#
6600 #	Simply test the exponent, j-bit, and mantissa values to		#
6601 # determine the type of operand.					#
6602 #	If it's an unnormalized zero, alter the operand and force it	#
6603 # to be a normal zero.							#
6604 #									#
6605 #########################################################################
6606 
6607 	global		set_tag_x
6608 set_tag_x:
6609 	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
6610 	andi.w		&0x7fff, %d0		# strip off sign
6611 	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
6612 	beq.b		inf_or_nan_x
6613 not_inf_or_nan_x:
6614 	btst		&0x7,FTEMP_HI(%a0)
6615 	beq.b		not_norm_x
6616 is_norm_x:
6617 	mov.b		&NORM, %d0
6618 	rts
6619 not_norm_x:
6620 	tst.w		%d0			# is exponent = 0?
6621 	bne.b		is_unnorm_x
6622 not_unnorm_x:
6623 	tst.l		FTEMP_HI(%a0)
6624 	bne.b		is_denorm_x
6625 	tst.l		FTEMP_LO(%a0)
6626 	bne.b		is_denorm_x
6627 is_zero_x:
6628 	mov.b		&ZERO, %d0
6629 	rts
6630 is_denorm_x:
6631 	mov.b		&DENORM, %d0
6632 	rts
6633 # must distinguish now "Unnormalized zeroes" which we
6634 # must convert to zero.
6635 is_unnorm_x:
6636 	tst.l		FTEMP_HI(%a0)
6637 	bne.b		is_unnorm_reg_x
6638 	tst.l		FTEMP_LO(%a0)
6639 	bne.b		is_unnorm_reg_x
6640 # it's an "unnormalized zero". let's convert it to an actual zero...
6641 	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
6642 	mov.b		&ZERO, %d0
6643 	rts
6644 is_unnorm_reg_x:
6645 	mov.b		&UNNORM, %d0
6646 	rts
6647 inf_or_nan_x:
6648 	tst.l		FTEMP_LO(%a0)
6649 	bne.b		is_nan_x
6650 	mov.l		FTEMP_HI(%a0), %d0
6651 	and.l		&0x7fffffff, %d0	# msb is a don't care!
6652 	bne.b		is_nan_x
6653 is_inf_x:
6654 	mov.b		&INF, %d0
6655 	rts
6656 is_nan_x:
6657 	btst		&0x6, FTEMP_HI(%a0)
6658 	beq.b		is_snan_x
6659 	mov.b		&QNAN, %d0
6660 	rts
6661 is_snan_x:
6662 	mov.b		&SNAN, %d0
6663 	rts
6664 
6665 #########################################################################
6666 # XDEF ****************************************************************	#
6667 #	set_tag_d(): return the optype of the input dbl fp number	#
6668 #									#
6669 # XREF ****************************************************************	#
6670 #	None								#
6671 #									#
6672 # INPUT ***************************************************************	#
6673 #	a0 = points to double precision operand				#
6674 #									#
6675 # OUTPUT **************************************************************	#
6676 #	d0 = value of type tag						#
6677 #		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6678 #									#
6679 # ALGORITHM ***********************************************************	#
6680 #	Simply test the exponent, j-bit, and mantissa values to		#
6681 # determine the type of operand.					#
6682 #									#
6683 #########################################################################
6684 
6685 	global		set_tag_d
6686 set_tag_d:
6687 	mov.l		FTEMP(%a0), %d0
6688 	mov.l		%d0, %d1
6689 
6690 	andi.l		&0x7ff00000, %d0
6691 	beq.b		zero_or_denorm_d
6692 
6693 	cmpi.l		%d0, &0x7ff00000
6694 	beq.b		inf_or_nan_d
6695 
6696 is_norm_d:
6697 	mov.b		&NORM, %d0
6698 	rts
6699 zero_or_denorm_d:
6700 	and.l		&0x000fffff, %d1
6701 	bne		is_denorm_d
6702 	tst.l		4+FTEMP(%a0)
6703 	bne		is_denorm_d
6704 is_zero_d:
6705 	mov.b		&ZERO, %d0
6706 	rts
6707 is_denorm_d:
6708 	mov.b		&DENORM, %d0
6709 	rts
6710 inf_or_nan_d:
6711 	and.l		&0x000fffff, %d1
6712 	bne		is_nan_d
6713 	tst.l		4+FTEMP(%a0)
6714 	bne		is_nan_d
6715 is_inf_d:
6716 	mov.b		&INF, %d0
6717 	rts
6718 is_nan_d:
6719 	btst		&19, %d1
6720 	bne		is_qnan_d
6721 is_snan_d:
6722 	mov.b		&SNAN, %d0
6723 	rts
6724 is_qnan_d:
6725 	mov.b		&QNAN, %d0
6726 	rts
6727 
6728 #########################################################################
6729 # XDEF ****************************************************************	#
6730 #	set_tag_s(): return the optype of the input sgl fp number	#
6731 #									#
6732 # XREF ****************************************************************	#
6733 #	None								#
6734 #									#
6735 # INPUT ***************************************************************	#
6736 #	a0 = pointer to single precision operand			#
6737 #									#
6738 # OUTPUT **************************************************************	#
6739 #	d0 = value of type tag						#
6740 #		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6741 #									#
6742 # ALGORITHM ***********************************************************	#
6743 #	Simply test the exponent, j-bit, and mantissa values to		#
6744 # determine the type of operand.					#
6745 #									#
6746 #########################################################################
6747 
6748 	global		set_tag_s
6749 set_tag_s:
6750 	mov.l		FTEMP(%a0), %d0
6751 	mov.l		%d0, %d1
6752 
6753 	andi.l		&0x7f800000, %d0
6754 	beq.b		zero_or_denorm_s
6755 
6756 	cmpi.l		%d0, &0x7f800000
6757 	beq.b		inf_or_nan_s
6758 
6759 is_norm_s:
6760 	mov.b		&NORM, %d0
6761 	rts
6762 zero_or_denorm_s:
6763 	and.l		&0x007fffff, %d1
6764 	bne		is_denorm_s
6765 is_zero_s:
6766 	mov.b		&ZERO, %d0
6767 	rts
6768 is_denorm_s:
6769 	mov.b		&DENORM, %d0
6770 	rts
6771 inf_or_nan_s:
6772 	and.l		&0x007fffff, %d1
6773 	bne		is_nan_s
6774 is_inf_s:
6775 	mov.b		&INF, %d0
6776 	rts
6777 is_nan_s:
6778 	btst		&22, %d1
6779 	bne		is_qnan_s
6780 is_snan_s:
6781 	mov.b		&SNAN, %d0
6782 	rts
6783 is_qnan_s:
6784 	mov.b		&QNAN, %d0
6785 	rts
6786 
6787 #########################################################################
6788 # XDEF ****************************************************************	#
6789 #	unf_res(): routine to produce default underflow result of a	#
6790 #		   scaled extended precision number; this is used by	#
6791 #		   fadd/fdiv/fmul/etc. emulation routines.		#
6792 #	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
6793 #		    single round prec and extended prec mode.		#
6794 #									#
6795 # XREF ****************************************************************	#
6796 #	_denorm() - denormalize according to scale factor		#
6797 #	_round() - round denormalized number according to rnd prec	#
6798 #									#
6799 # INPUT ***************************************************************	#
6800 #	a0 = pointer to extended precison operand			#
6801 #	d0 = scale factor						#
6802 #	d1 = rounding precision/mode					#
6803 #									#
6804 # OUTPUT **************************************************************	#
6805 #	a0 = pointer to default underflow result in extended precision	#
6806 #	d0.b = result FPSR_cc which caller may or may not want to save	#
6807 #									#
6808 # ALGORITHM ***********************************************************	#
6809 #	Convert the input operand to "internal format" which means the	#
6810 # exponent is extended to 16 bits and the sign is stored in the unused	#
6811 # portion of the extended precison operand. Denormalize the number	#
6812 # according to the scale factor passed in d0. Then, round the		#
6813 # denormalized result.							#
6814 #	Set the FPSR_exc bits as appropriate but return the cc bits in	#
6815 # d0 in case the caller doesn't want to save them (as is the case for	#
6816 # fmove out).								#
6817 #	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
6818 # precision and the rounding mode to single.				#
6819 #									#
6820 #########################################################################
6821 	global		unf_res
6822 unf_res:
6823 	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
6824 
6825 	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
6826 	sne		FTEMP_SGN(%a0)
6827 
6828 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6829 	and.w		&0x7fff, %d1
6830 	sub.w		%d0, %d1
6831 	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
6832 
6833 	mov.l		%a0, -(%sp)		# save operand ptr during calls
6834 
6835 	mov.l		0x4(%sp),%d0		# pass rnd prec.
6836 	andi.w		&0x00c0,%d0
6837 	lsr.w		&0x4,%d0
6838 	bsr.l		_denorm			# denorm result
6839 
6840 	mov.l		(%sp),%a0
6841 	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
6842 	andi.w		&0xc0,%d1		# extract rnd prec
6843 	lsr.w		&0x4,%d1
6844 	swap		%d1
6845 	mov.w		0x6(%sp),%d1
6846 	andi.w		&0x30,%d1
6847 	lsr.w		&0x4,%d1
6848 	bsr.l		_round			# round the denorm
6849 
6850 	mov.l		(%sp)+, %a0
6851 
6852 # result is now rounded properly. convert back to normal format
6853 	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
6854 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6855 	beq.b		unf_res_chkifzero	# no; result is positive
6856 	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
6857 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6858 
6859 # the number may have become zero after rounding. set ccodes accordingly.
6860 unf_res_chkifzero:
6861 	clr.l		%d0
6862 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6863 	bne.b		unf_res_cont		# no
6864 	tst.l		FTEMP_LO(%a0)
6865 	bne.b		unf_res_cont		# no
6866 #	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
6867 	bset		&z_bit, %d0		# yes; set zero ccode bit
6868 
6869 unf_res_cont:
6870 
6871 #
6872 # can inex1 also be set along with unfl and inex2???
6873 #
6874 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875 #
6876 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877 	beq.b		unf_res_end		# no
6878 	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879 
6880 unf_res_end:
6881 	add.l		&0x4, %sp		# clear stack
6882 	rts
6883 
6884 # unf_res() for fsglmul() and fsgldiv().
6885 	global		unf_res4
6886 unf_res4:
6887 	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
6888 
6889 	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
6890 	sne		FTEMP_SGN(%a0)
6891 
6892 	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
6893 	and.w		&0x7fff,%d1
6894 	sub.w		%d0,%d1
6895 	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
6896 
6897 	mov.l		%a0,-(%sp)		# save operand ptr during calls
6898 
6899 	clr.l		%d0			# force rnd prec = ext
6900 	bsr.l		_denorm			# denorm result
6901 
6902 	mov.l		(%sp),%a0
6903 	mov.w		&s_mode,%d1		# force rnd prec = sgl
6904 	swap		%d1
6905 	mov.w		0x6(%sp),%d1		# load rnd mode
6906 	andi.w		&0x30,%d1		# extract rnd prec
6907 	lsr.w		&0x4,%d1
6908 	bsr.l		_round			# round the denorm
6909 
6910 	mov.l		(%sp)+,%a0
6911 
6912 # result is now rounded properly. convert back to normal format
6913 	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
6914 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6915 	beq.b		unf_res4_chkifzero	# no; result is positive
6916 	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
6917 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6918 
6919 # the number may have become zero after rounding. set ccodes accordingly.
6920 unf_res4_chkifzero:
6921 	clr.l		%d0
6922 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6923 	bne.b		unf_res4_cont		# no
6924 	tst.l		FTEMP_LO(%a0)
6925 	bne.b		unf_res4_cont		# no
6926 #	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
6927 	bset		&z_bit,%d0		# yes; set zero ccode bit
6928 
6929 unf_res4_cont:
6930 
6931 #
6932 # can inex1 also be set along with unfl and inex2???
6933 #
6934 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935 #
6936 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937 	beq.b		unf_res4_end		# no
6938 	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939 
6940 unf_res4_end:
6941 	add.l		&0x4,%sp		# clear stack
6942 	rts
6943 
6944 #########################################################################
6945 # XDEF ****************************************************************	#
6946 #	ovf_res(): routine to produce the default overflow result of	#
6947 #		   an overflowing number.				#
6948 #	ovf_res2(): same as above but the rnd mode/prec are passed	#
6949 #		    differently.					#
6950 #									#
6951 # XREF ****************************************************************	#
6952 #	none								#
6953 #									#
6954 # INPUT ***************************************************************	#
6955 #	d1.b	= '-1' => (-); '0' => (+)				#
6956 #   ovf_res():								#
6957 #	d0	= rnd mode/prec						#
6958 #   ovf_res2():								#
6959 #	hi(d0)	= rnd prec						#
6960 #	lo(d0)	= rnd mode						#
6961 #									#
6962 # OUTPUT **************************************************************	#
6963 #	a0	= points to extended precision result			#
6964 #	d0.b	= condition code bits					#
6965 #									#
6966 # ALGORITHM ***********************************************************	#
6967 #	The default overflow result can be determined by the sign of	#
6968 # the result and the rounding mode/prec in effect. These bits are	#
6969 # concatenated together to create an index into the default result	#
6970 # table. A pointer to the correct result is returned in a0. The		#
6971 # resulting condition codes are returned in d0 in case the caller	#
6972 # doesn't want FPSR_cc altered (as is the case for fmove out).		#
6973 #									#
6974 #########################################################################
6975 
6976 	global		ovf_res
6977 ovf_res:
6978 	andi.w		&0x10,%d1		# keep result sign
6979 	lsr.b		&0x4,%d0		# shift prec/mode
6980 	or.b		%d0,%d1			# concat the two
6981 	mov.w		%d1,%d0			# make a copy
6982 	lsl.b		&0x1,%d1		# multiply d1 by 2
6983 	bra.b		ovf_res_load
6984 
6985 	global		ovf_res2
6986 ovf_res2:
6987 	and.w		&0x10, %d1		# keep result sign
6988 	or.b		%d0, %d1		# insert rnd mode
6989 	swap		%d0
6990 	or.b		%d0, %d1		# insert rnd prec
6991 	mov.w		%d1, %d0		# make a copy
6992 	lsl.b		&0x1, %d1		# shift left by 1
6993 
6994 #
6995 # use the rounding mode, precision, and result sign as in index into the
6996 # two tables below to fetch the default result and the result ccodes.
6997 #
6998 ovf_res_load:
6999 	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000 	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001 
7002 	rts
7003 
7004 tbl_ovfl_cc:
7005 	byte		0x2, 0x0, 0x0, 0x2
7006 	byte		0x2, 0x0, 0x0, 0x2
7007 	byte		0x2, 0x0, 0x0, 0x2
7008 	byte		0x0, 0x0, 0x0, 0x0
7009 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7010 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7011 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7012 
7013 tbl_ovfl_result:
7014 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018 
7019 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023 
7024 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028 
7029 	long		0x00000000,0x00000000,0x00000000,0x00000000
7030 	long		0x00000000,0x00000000,0x00000000,0x00000000
7031 	long		0x00000000,0x00000000,0x00000000,0x00000000
7032 	long		0x00000000,0x00000000,0x00000000,0x00000000
7033 
7034 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038 
7039 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043 
7044 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048 
7049 #########################################################################
7050 # XDEF ****************************************************************	#
7051 #	fout(): move from fp register to memory or data register	#
7052 #									#
7053 # XREF ****************************************************************	#
7054 #	_round() - needed to create EXOP for sgl/dbl precision		#
7055 #	norm() - needed to create EXOP for extended precision		#
7056 #	ovf_res() - create default overflow result for sgl/dbl precision#
7057 #	unf_res() - create default underflow result for sgl/dbl prec.	#
7058 #	dst_dbl() - create rounded dbl precision result.		#
7059 #	dst_sgl() - create rounded sgl precision result.		#
7060 #	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
7061 #	bindec() - convert FP binary number to packed number.		#
7062 #	_mem_write() - write data to memory.				#
7063 #	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064 #	_dmem_write_{byte,word,long}() - write data to memory.		#
7065 #	store_dreg_{b,w,l}() - store data to data register file.	#
7066 #	facc_out_{b,w,l,d,x}() - data access error occurred.		#
7067 #									#
7068 # INPUT ***************************************************************	#
7069 #	a0 = pointer to extended precision source operand		#
7070 #	d0 = round prec,mode						#
7071 #									#
7072 # OUTPUT **************************************************************	#
7073 #	fp0 : intermediate underflow or overflow result if		#
7074 #	      OVFL/UNFL occurred for a sgl or dbl operand		#
7075 #									#
7076 # ALGORITHM ***********************************************************	#
7077 #	This routine is accessed by many handlers that need to do an	#
7078 # opclass three move of an operand out to memory.			#
7079 #	Decode an fmove out (opclass 3) instruction to determine if	#
7080 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
7081 # register or memory. The algorithm uses a standard "fmove" to create	#
7082 # the rounded result. Also, since exceptions are disabled, this also	#
7083 # create the correct OPERR default result if appropriate.		#
7084 #	For sgl or dbl precision, overflow or underflow can occur. If	#
7085 # either occurs and is enabled, the EXOP.				#
7086 #	For extended precision, the stacked <ea> must be fixed along	#
7087 # w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
7088 # the source is a denorm and if underflow is enabled, an EXOP must be	#
7089 # created.								#
7090 #	For packed, the k-factor must be fetched from the instruction	#
7091 # word or a data register. The <ea> must be fixed as w/ extended	#
7092 # precision. Then, bindec() is called to create the appropriate		#
7093 # packed result.							#
7094 #	If at any time an access error is flagged by one of the move-	#
7095 # to-memory routines, then a special exit must be made so that the	#
7096 # access error can be handled properly.					#
7097 #									#
7098 #########################################################################
7099 
7100 	global		fout
7101 fout:
7102 	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103 	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104 	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
7105 
7106 	swbeg		&0x8
7107 tbl_fout:
7108 	short		fout_long	-	tbl_fout
7109 	short		fout_sgl	-	tbl_fout
7110 	short		fout_ext	-	tbl_fout
7111 	short		fout_pack	-	tbl_fout
7112 	short		fout_word	-	tbl_fout
7113 	short		fout_dbl	-	tbl_fout
7114 	short		fout_byte	-	tbl_fout
7115 	short		fout_pack	-	tbl_fout
7116 
7117 #################################################################
7118 # fmove.b out ###################################################
7119 #################################################################
7120 
7121 # Only "Unimplemented Data Type" exceptions enter here. The operand
7122 # is either a DENORM or a NORM.
7123 fout_byte:
7124 	tst.b		STAG(%a6)		# is operand normalized?
7125 	bne.b		fout_byte_denorm	# no
7126 
7127 	fmovm.x		SRC(%a0),&0x80		# load value
7128 
7129 fout_byte_norm:
7130 	fmov.l		%d0,%fpcr		# insert rnd prec,mode
7131 
7132 	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
7133 
7134 	fmov.l		&0x0,%fpcr		# clear FPCR
7135 	fmov.l		%fpsr,%d1		# fetch FPSR
7136 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7137 
7138 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7139 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7140 	beq.b		fout_byte_dn		# must save to integer regfile
7141 
7142 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7143 	bsr.l		_dmem_write_byte	# write byte
7144 
7145 	tst.l		%d1			# did dstore fail?
7146 	bne.l		facc_out_b		# yes
7147 
7148 	rts
7149 
7150 fout_byte_dn:
7151 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7152 	andi.w		&0x7,%d1
7153 	bsr.l		store_dreg_b
7154 	rts
7155 
7156 fout_byte_denorm:
7157 	mov.l		SRC_EX(%a0),%d1
7158 	andi.l		&0x80000000,%d1		# keep DENORM sign
7159 	ori.l		&0x00800000,%d1		# make smallest sgl
7160 	fmov.s		%d1,%fp0
7161 	bra.b		fout_byte_norm
7162 
7163 #################################################################
7164 # fmove.w out ###################################################
7165 #################################################################
7166 
7167 # Only "Unimplemented Data Type" exceptions enter here. The operand
7168 # is either a DENORM or a NORM.
7169 fout_word:
7170 	tst.b		STAG(%a6)		# is operand normalized?
7171 	bne.b		fout_word_denorm	# no
7172 
7173 	fmovm.x		SRC(%a0),&0x80		# load value
7174 
7175 fout_word_norm:
7176 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7177 
7178 	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
7179 
7180 	fmov.l		&0x0,%fpcr		# clear FPCR
7181 	fmov.l		%fpsr,%d1		# fetch FPSR
7182 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7183 
7184 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7185 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7186 	beq.b		fout_word_dn		# must save to integer regfile
7187 
7188 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7189 	bsr.l		_dmem_write_word	# write word
7190 
7191 	tst.l		%d1			# did dstore fail?
7192 	bne.l		facc_out_w		# yes
7193 
7194 	rts
7195 
7196 fout_word_dn:
7197 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7198 	andi.w		&0x7,%d1
7199 	bsr.l		store_dreg_w
7200 	rts
7201 
7202 fout_word_denorm:
7203 	mov.l		SRC_EX(%a0),%d1
7204 	andi.l		&0x80000000,%d1		# keep DENORM sign
7205 	ori.l		&0x00800000,%d1		# make smallest sgl
7206 	fmov.s		%d1,%fp0
7207 	bra.b		fout_word_norm
7208 
7209 #################################################################
7210 # fmove.l out ###################################################
7211 #################################################################
7212 
7213 # Only "Unimplemented Data Type" exceptions enter here. The operand
7214 # is either a DENORM or a NORM.
7215 fout_long:
7216 	tst.b		STAG(%a6)		# is operand normalized?
7217 	bne.b		fout_long_denorm	# no
7218 
7219 	fmovm.x		SRC(%a0),&0x80		# load value
7220 
7221 fout_long_norm:
7222 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7223 
7224 	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
7225 
7226 	fmov.l		&0x0,%fpcr		# clear FPCR
7227 	fmov.l		%fpsr,%d1		# fetch FPSR
7228 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7229 
7230 fout_long_write:
7231 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7232 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7233 	beq.b		fout_long_dn		# must save to integer regfile
7234 
7235 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7236 	bsr.l		_dmem_write_long	# write long
7237 
7238 	tst.l		%d1			# did dstore fail?
7239 	bne.l		facc_out_l		# yes
7240 
7241 	rts
7242 
7243 fout_long_dn:
7244 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7245 	andi.w		&0x7,%d1
7246 	bsr.l		store_dreg_l
7247 	rts
7248 
7249 fout_long_denorm:
7250 	mov.l		SRC_EX(%a0),%d1
7251 	andi.l		&0x80000000,%d1		# keep DENORM sign
7252 	ori.l		&0x00800000,%d1		# make smallest sgl
7253 	fmov.s		%d1,%fp0
7254 	bra.b		fout_long_norm
7255 
7256 #################################################################
7257 # fmove.x out ###################################################
7258 #################################################################
7259 
7260 # Only "Unimplemented Data Type" exceptions enter here. The operand
7261 # is either a DENORM or a NORM.
7262 # The DENORM causes an Underflow exception.
7263 fout_ext:
7264 
7265 # we copy the extended precision result to FP_SCR0 so that the reserved
7266 # 16-bit field gets zeroed. we do this since we promise not to disturb
7267 # what's at SRC(a0).
7268 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7269 	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
7270 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7271 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7272 
7273 	fmovm.x		SRC(%a0),&0x80		# return result
7274 
7275 	bsr.l		_calc_ea_fout		# fix stacked <ea>
7276 
7277 	mov.l		%a0,%a1			# pass: dst addr
7278 	lea		FP_SCR0(%a6),%a0	# pass: src addr
7279 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7280 
7281 # we must not yet write the extended precision data to the stack
7282 # in the pre-decrement case from supervisor mode or else we'll corrupt
7283 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7285 	beq.b		fout_ext_a7
7286 
7287 	bsr.l		_dmem_write		# write ext prec number to memory
7288 
7289 	tst.l		%d1			# did dstore fail?
7290 	bne.w		fout_ext_err		# yes
7291 
7292 	tst.b		STAG(%a6)		# is operand normalized?
7293 	bne.b		fout_ext_denorm		# no
7294 	rts
7295 
7296 # the number is a DENORM. must set the underflow exception bit
7297 fout_ext_denorm:
7298 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299 
7300 	mov.b		FPCR_ENABLE(%a6),%d0
7301 	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
7302 	bne.b		fout_ext_exc		# yes
7303 	rts
7304 
7305 # we don't want to do the write if the exception occurred in supervisor mode
7306 # so _mem_write2() handles this for us.
7307 fout_ext_a7:
7308 	bsr.l		_mem_write2		# write ext prec number to memory
7309 
7310 	tst.l		%d1			# did dstore fail?
7311 	bne.w		fout_ext_err		# yes
7312 
7313 	tst.b		STAG(%a6)		# is operand normalized?
7314 	bne.b		fout_ext_denorm		# no
7315 	rts
7316 
7317 fout_ext_exc:
7318 	lea		FP_SCR0(%a6),%a0
7319 	bsr.l		norm			# normalize the mantissa
7320 	neg.w		%d0			# new exp = -(shft amt)
7321 	andi.w		&0x7fff,%d0
7322 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
7323 	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7324 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7325 	rts
7326 
7327 fout_ext_err:
7328 	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
7329 	bra.l		facc_out_x
7330 
7331 #########################################################################
7332 # fmove.s out ###########################################################
7333 #########################################################################
7334 fout_sgl:
7335 	andi.b		&0x30,%d0		# clear rnd prec
7336 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
7337 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7338 
7339 #
7340 # operand is a normalized number. first, we check to see if the move out
7341 # would cause either an underflow or overflow. these cases are handled
7342 # separately. otherwise, set the FPCR to the proper rounding mode and
7343 # execute the move.
7344 #
7345 	mov.w		SRC_EX(%a0),%d0		# extract exponent
7346 	andi.w		&0x7fff,%d0		# strip sign
7347 
7348 	cmpi.w		%d0,&SGL_HI		# will operand overflow?
7349 	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
7350 	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
7351 	cmpi.w		%d0,&SGL_LO		# will operand underflow?
7352 	blt.w		fout_sgl_unfl		# yes; go handle underflow
7353 
7354 #
7355 # NORMs(in range) can be stored out by a simple "fmov.s"
7356 # Unnormalized inputs can come through this point.
7357 #
7358 fout_sgl_exg:
7359 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7360 
7361 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7362 	fmov.l		&0x0,%fpsr		# clear FPSR
7363 
7364 	fmov.s		%fp0,%d0		# store does convert and round
7365 
7366 	fmov.l		&0x0,%fpcr		# clear FPCR
7367 	fmov.l		%fpsr,%d1		# save FPSR
7368 
7369 	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
7370 
7371 fout_sgl_exg_write:
7372 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7373 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7374 	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
7375 
7376 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7377 	bsr.l		_dmem_write_long	# write long
7378 
7379 	tst.l		%d1			# did dstore fail?
7380 	bne.l		facc_out_l		# yes
7381 
7382 	rts
7383 
7384 fout_sgl_exg_write_dn:
7385 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7386 	andi.w		&0x7,%d1
7387 	bsr.l		store_dreg_l
7388 	rts
7389 
7390 #
7391 # here, we know that the operand would UNFL if moved out to single prec,
7392 # so, denorm and round and then use generic store single routine to
7393 # write the value to memory.
7394 #
7395 fout_sgl_unfl:
7396 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397 
7398 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7399 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7400 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7401 	mov.l		%a0,-(%sp)
7402 
7403 	clr.l		%d0			# pass: S.F. = 0
7404 
7405 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7406 	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
7407 
7408 	lea		FP_SCR0(%a6),%a0
7409 	bsr.l		norm			# normalize the DENORM
7410 
7411 fout_sgl_unfl_cont:
7412 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7413 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7414 	bsr.l		unf_res			# calc default underflow result
7415 
7416 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7417 	bsr.l		dst_sgl			# convert to single prec
7418 
7419 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7420 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7421 	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
7422 
7423 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7424 	bsr.l		_dmem_write_long	# write long
7425 
7426 	tst.l		%d1			# did dstore fail?
7427 	bne.l		facc_out_l		# yes
7428 
7429 	bra.b		fout_sgl_unfl_chkexc
7430 
7431 fout_sgl_unfl_dn:
7432 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7433 	andi.w		&0x7,%d1
7434 	bsr.l		store_dreg_l
7435 
7436 fout_sgl_unfl_chkexc:
7437 	mov.b		FPCR_ENABLE(%a6),%d1
7438 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7439 	bne.w		fout_sd_exc_unfl	# yes
7440 	addq.l		&0x4,%sp
7441 	rts
7442 
7443 #
7444 # it's definitely an overflow so call ovf_res to get the correct answer
7445 #
7446 fout_sgl_ovfl:
7447 	tst.b		3+SRC_HI(%a0)		# is result inexact?
7448 	bne.b		fout_sgl_ovfl_inex2
7449 	tst.l		SRC_LO(%a0)		# is result inexact?
7450 	bne.b		fout_sgl_ovfl_inex2
7451 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452 	bra.b		fout_sgl_ovfl_cont
7453 fout_sgl_ovfl_inex2:
7454 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455 
7456 fout_sgl_ovfl_cont:
7457 	mov.l		%a0,-(%sp)
7458 
7459 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460 # overflow result. DON'T save the returned ccodes from ovf_res() since
7461 # fmove out doesn't alter them.
7462 	tst.b		SRC_EX(%a0)		# is operand negative?
7463 	smi		%d1			# set if so
7464 	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
7465 	bsr.l		ovf_res			# calc OVFL result
7466 	fmovm.x		(%a0),&0x80		# load default overflow result
7467 	fmov.s		%fp0,%d0		# store to single
7468 
7469 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7470 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7471 	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
7472 
7473 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7474 	bsr.l		_dmem_write_long	# write long
7475 
7476 	tst.l		%d1			# did dstore fail?
7477 	bne.l		facc_out_l		# yes
7478 
7479 	bra.b		fout_sgl_ovfl_chkexc
7480 
7481 fout_sgl_ovfl_dn:
7482 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7483 	andi.w		&0x7,%d1
7484 	bsr.l		store_dreg_l
7485 
7486 fout_sgl_ovfl_chkexc:
7487 	mov.b		FPCR_ENABLE(%a6),%d1
7488 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7489 	bne.w		fout_sd_exc_ovfl	# yes
7490 	addq.l		&0x4,%sp
7491 	rts
7492 
7493 #
7494 # move out MAY overflow:
7495 # (1) force the exp to 0x3fff
7496 # (2) do a move w/ appropriate rnd mode
7497 # (3) if exp still equals zero, then insert original exponent
7498 #	for the correct result.
7499 #     if exp now equals one, then it overflowed so call ovf_res.
7500 #
7501 fout_sgl_may_ovfl:
7502 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7503 	andi.w		&0x8000,%d1		# keep it,clear exp
7504 	ori.w		&0x3fff,%d1		# insert exp = 0
7505 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7506 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508 
7509 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7510 
7511 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7512 	fmov.l		&0x0,%fpcr		# clear FPCR
7513 
7514 	fabs.x		%fp0			# need absolute value
7515 	fcmp.b		%fp0,&0x2		# did exponent increase?
7516 	fblt.w		fout_sgl_exg		# no; go finish NORM
7517 	bra.w		fout_sgl_ovfl		# yes; go handle overflow
7518 
7519 ################
7520 
7521 fout_sd_exc_unfl:
7522 	mov.l		(%sp)+,%a0
7523 
7524 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7525 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7526 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7527 
7528 	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
7529 	bne.b		fout_sd_exc_cont	# no
7530 
7531 	lea		FP_SCR0(%a6),%a0
7532 	bsr.l		norm
7533 	neg.l		%d0
7534 	andi.w		&0x7fff,%d0
7535 	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
7536 	bra.b		fout_sd_exc_cont
7537 
7538 fout_sd_exc:
7539 fout_sd_exc_ovfl:
7540 	mov.l		(%sp)+,%a0		# restore a0
7541 
7542 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7543 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7544 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7545 
7546 fout_sd_exc_cont:
7547 	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
7548 	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
7549 	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
7550 
7551 	mov.b		3+L_SCR3(%a6),%d1
7552 	lsr.b		&0x4,%d1
7553 	andi.w		&0x0c,%d1
7554 	swap		%d1
7555 	mov.b		3+L_SCR3(%a6),%d1
7556 	lsr.b		&0x4,%d1
7557 	andi.w		&0x03,%d1
7558 	clr.l		%d0			# pass: zero g,r,s
7559 	bsr.l		_round			# round the DENORM
7560 
7561 	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
7562 	beq.b		fout_sd_exc_done	# no
7563 	bset		&0x7,FP_SCR0_EX(%a6)	# yes
7564 
7565 fout_sd_exc_done:
7566 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7567 	rts
7568 
7569 #################################################################
7570 # fmove.d out ###################################################
7571 #################################################################
7572 fout_dbl:
7573 	andi.b		&0x30,%d0		# clear rnd prec
7574 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
7575 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7576 
7577 #
7578 # operand is a normalized number. first, we check to see if the move out
7579 # would cause either an underflow or overflow. these cases are handled
7580 # separately. otherwise, set the FPCR to the proper rounding mode and
7581 # execute the move.
7582 #
7583 	mov.w		SRC_EX(%a0),%d0		# extract exponent
7584 	andi.w		&0x7fff,%d0		# strip sign
7585 
7586 	cmpi.w		%d0,&DBL_HI		# will operand overflow?
7587 	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
7588 	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
7589 	cmpi.w		%d0,&DBL_LO		# will operand underflow?
7590 	blt.w		fout_dbl_unfl		# yes; go handle underflow
7591 
7592 #
7593 # NORMs(in range) can be stored out by a simple "fmov.d"
7594 # Unnormalized inputs can come through this point.
7595 #
7596 fout_dbl_exg:
7597 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7598 
7599 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7600 	fmov.l		&0x0,%fpsr		# clear FPSR
7601 
7602 	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
7603 
7604 	fmov.l		&0x0,%fpcr		# clear FPCR
7605 	fmov.l		%fpsr,%d0		# save FPSR
7606 
7607 	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
7608 
7609 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7610 	lea		L_SCR1(%a6),%a0		# pass: src addr
7611 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7612 	bsr.l		_dmem_write		# store dbl fop to memory
7613 
7614 	tst.l		%d1			# did dstore fail?
7615 	bne.l		facc_out_d		# yes
7616 
7617 	rts					# no; so we're finished
7618 
7619 #
7620 # here, we know that the operand would UNFL if moved out to double prec,
7621 # so, denorm and round and then use generic store double routine to
7622 # write the value to memory.
7623 #
7624 fout_dbl_unfl:
7625 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626 
7627 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7628 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7629 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7630 	mov.l		%a0,-(%sp)
7631 
7632 	clr.l		%d0			# pass: S.F. = 0
7633 
7634 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7635 	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
7636 
7637 	lea		FP_SCR0(%a6),%a0
7638 	bsr.l		norm			# normalize the DENORM
7639 
7640 fout_dbl_unfl_cont:
7641 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7642 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7643 	bsr.l		unf_res			# calc default underflow result
7644 
7645 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7646 	bsr.l		dst_dbl			# convert to single prec
7647 	mov.l		%d0,L_SCR1(%a6)
7648 	mov.l		%d1,L_SCR2(%a6)
7649 
7650 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7651 	lea		L_SCR1(%a6),%a0		# pass: src addr
7652 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7653 	bsr.l		_dmem_write		# store dbl fop to memory
7654 
7655 	tst.l		%d1			# did dstore fail?
7656 	bne.l		facc_out_d		# yes
7657 
7658 	mov.b		FPCR_ENABLE(%a6),%d1
7659 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7660 	bne.w		fout_sd_exc_unfl	# yes
7661 	addq.l		&0x4,%sp
7662 	rts
7663 
7664 #
7665 # it's definitely an overflow so call ovf_res to get the correct answer
7666 #
7667 fout_dbl_ovfl:
7668 	mov.w		2+SRC_LO(%a0),%d0
7669 	andi.w		&0x7ff,%d0
7670 	bne.b		fout_dbl_ovfl_inex2
7671 
7672 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673 	bra.b		fout_dbl_ovfl_cont
7674 fout_dbl_ovfl_inex2:
7675 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676 
7677 fout_dbl_ovfl_cont:
7678 	mov.l		%a0,-(%sp)
7679 
7680 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681 # overflow result. DON'T save the returned ccodes from ovf_res() since
7682 # fmove out doesn't alter them.
7683 	tst.b		SRC_EX(%a0)		# is operand negative?
7684 	smi		%d1			# set if so
7685 	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
7686 	bsr.l		ovf_res			# calc OVFL result
7687 	fmovm.x		(%a0),&0x80		# load default overflow result
7688 	fmov.d		%fp0,L_SCR1(%a6)	# store to double
7689 
7690 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7691 	lea		L_SCR1(%a6),%a0		# pass: src addr
7692 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7693 	bsr.l		_dmem_write		# store dbl fop to memory
7694 
7695 	tst.l		%d1			# did dstore fail?
7696 	bne.l		facc_out_d		# yes
7697 
7698 	mov.b		FPCR_ENABLE(%a6),%d1
7699 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7700 	bne.w		fout_sd_exc_ovfl	# yes
7701 	addq.l		&0x4,%sp
7702 	rts
7703 
7704 #
7705 # move out MAY overflow:
7706 # (1) force the exp to 0x3fff
7707 # (2) do a move w/ appropriate rnd mode
7708 # (3) if exp still equals zero, then insert original exponent
7709 #	for the correct result.
7710 #     if exp now equals one, then it overflowed so call ovf_res.
7711 #
7712 fout_dbl_may_ovfl:
7713 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7714 	andi.w		&0x8000,%d1		# keep it,clear exp
7715 	ori.w		&0x3fff,%d1		# insert exp = 0
7716 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7717 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719 
7720 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7721 
7722 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7723 	fmov.l		&0x0,%fpcr		# clear FPCR
7724 
7725 	fabs.x		%fp0			# need absolute value
7726 	fcmp.b		%fp0,&0x2		# did exponent increase?
7727 	fblt.w		fout_dbl_exg		# no; go finish NORM
7728 	bra.w		fout_dbl_ovfl		# yes; go handle overflow
7729 
7730 #########################################################################
7731 # XDEF ****************************************************************	#
7732 #	dst_dbl(): create double precision value from extended prec.	#
7733 #									#
7734 # XREF ****************************************************************	#
7735 #	None								#
7736 #									#
7737 # INPUT ***************************************************************	#
7738 #	a0 = pointer to source operand in extended precision		#
7739 #									#
7740 # OUTPUT **************************************************************	#
7741 #	d0 = hi(double precision result)				#
7742 #	d1 = lo(double precision result)				#
7743 #									#
7744 # ALGORITHM ***********************************************************	#
7745 #									#
7746 #  Changes extended precision to double precision.			#
7747 #  Note: no attempt is made to round the extended value to double.	#
7748 #	dbl_sign = ext_sign						#
7749 #	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
7750 #	get rid of ext integer bit					#
7751 #	dbl_mant = ext_mant{62:12}					#
7752 #									#
7753 #		---------------   ---------------    ---------------	#
7754 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7755 #		---------------   ---------------    ---------------	#
7756 #		 95	    64    63 62	      32      31     11	  0	#
7757 #				     |			     |		#
7758 #				     |			     |		#
7759 #				     |			     |		#
7760 #			             v			     v		#
7761 #			      ---------------   ---------------		#
7762 #  double   ->		      |s|exp| mant  |   |  mant       |		#
7763 #			      ---------------   ---------------		#
7764 #			      63     51   32   31	       0	#
7765 #									#
7766 #########################################################################
7767 
7768 dst_dbl:
7769 	clr.l		%d0			# clear d0
7770 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7771 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7772 	addi.w		&DBL_BIAS,%d0		# add double precision bias
7773 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7774 	bmi.b		dst_get_dupper		# no
7775 	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
7776 dst_get_dupper:
7777 	swap		%d0			# d0 now in upper word
7778 	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
7779 	tst.b		FTEMP_EX(%a0)		# test sign
7780 	bpl.b		dst_get_dman		# if positive, go process mantissa
7781 	bset		&0x1f,%d0		# if negative, set sign
7782 dst_get_dman:
7783 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7784 	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
7785 	or.l		%d1,%d0			# put these bits in ms word of double
7786 	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
7787 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7788 	mov.l		&21,%d0			# load shift count
7789 	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
7790 	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
7791 	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
7792 	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
7793 	mov.l		L_SCR2(%a6),%d1
7794 	or.l		%d0,%d1			# put them in double result
7795 	mov.l		L_SCR1(%a6),%d0
7796 	rts
7797 
7798 #########################################################################
7799 # XDEF ****************************************************************	#
7800 #	dst_sgl(): create single precision value from extended prec	#
7801 #									#
7802 # XREF ****************************************************************	#
7803 #									#
7804 # INPUT ***************************************************************	#
7805 #	a0 = pointer to source operand in extended precision		#
7806 #									#
7807 # OUTPUT **************************************************************	#
7808 #	d0 = single precision result					#
7809 #									#
7810 # ALGORITHM ***********************************************************	#
7811 #									#
7812 # Changes extended precision to single precision.			#
7813 #	sgl_sign = ext_sign						#
7814 #	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
7815 #	get rid of ext integer bit					#
7816 #	sgl_mant = ext_mant{62:12}					#
7817 #									#
7818 #		---------------   ---------------    ---------------	#
7819 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7820 #		---------------   ---------------    ---------------	#
7821 #		 95	    64    63 62	   40 32      31     12	  0	#
7822 #				     |	   |				#
7823 #				     |	   |				#
7824 #				     |	   |				#
7825 #			             v     v				#
7826 #			      ---------------				#
7827 #  single   ->		      |s|exp| mant  |				#
7828 #			      ---------------				#
7829 #			      31     22     0				#
7830 #									#
7831 #########################################################################
7832 
7833 dst_sgl:
7834 	clr.l		%d0
7835 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7836 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7837 	addi.w		&SGL_BIAS,%d0		# add single precision bias
7838 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7839 	bmi.b		dst_get_supper		# no
7840 	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
7841 dst_get_supper:
7842 	swap		%d0			# put exp in upper word of d0
7843 	lsl.l		&0x7,%d0		# shift it into single exp bits
7844 	tst.b		FTEMP_EX(%a0)		# test sign
7845 	bpl.b		dst_get_sman		# if positive, continue
7846 	bset		&0x1f,%d0		# if negative, put in sign first
7847 dst_get_sman:
7848 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7849 	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
7850 	lsr.l		&0x8,%d1		# and put them flush right
7851 	or.l		%d1,%d0			# put these bits in ms word of single
7852 	rts
7853 
7854 ##############################################################################
7855 fout_pack:
7856 	bsr.l		_calc_ea_fout		# fetch the <ea>
7857 	mov.l		%a0,-(%sp)
7858 
7859 	mov.b		STAG(%a6),%d0		# fetch input type
7860 	bne.w		fout_pack_not_norm	# input is not NORM
7861 
7862 fout_pack_norm:
7863 	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
7864 	beq.b		fout_pack_s		# static
7865 
7866 fout_pack_d:
7867 	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
7868 	lsr.b		&0x4,%d1
7869 	andi.w		&0x7,%d1
7870 
7871 	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
7872 
7873 	bra.b		fout_pack_type
7874 fout_pack_s:
7875 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
7876 
7877 fout_pack_type:
7878 	bfexts		%d0{&25:&7},%d0		# extract k-factor
7879 	mov.l	%d0,-(%sp)
7880 
7881 	lea		FP_SRC(%a6),%a0		# pass: ptr to input
7882 
7883 # bindec is currently scrambling FP_SRC for denorm inputs.
7884 # we'll have to change this, but for now, tough luck!!!
7885 	bsr.l		bindec			# convert xprec to packed
7886 
7887 #	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888 	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889 
7890 	mov.l	(%sp)+,%d0
7891 
7892 	tst.b		3+FP_SCR0_EX(%a6)
7893 	bne.b		fout_pack_set
7894 	tst.l		FP_SCR0_HI(%a6)
7895 	bne.b		fout_pack_set
7896 	tst.l		FP_SCR0_LO(%a6)
7897 	bne.b		fout_pack_set
7898 
7899 # add the extra condition that only if the k-factor was zero, too, should
7900 # we zero the exponent
7901 	tst.l		%d0
7902 	bne.b		fout_pack_set
7903 # "mantissa" is all zero which means that the answer is zero. but, the '040
7904 # algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905 # if the mantissa is zero, I will zero the exponent, too.
7906 # the question now is whether the exponents sign bit is allowed to be non-zero
7907 # for a zero, also...
7908 	andi.w		&0xf000,FP_SCR0(%a6)
7909 
7910 fout_pack_set:
7911 
7912 	lea		FP_SCR0(%a6),%a0	# pass: src addr
7913 
7914 fout_pack_write:
7915 	mov.l		(%sp)+,%a1		# pass: dst addr
7916 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7917 
7918 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7919 	beq.b		fout_pack_a7
7920 
7921 	bsr.l		_dmem_write		# write ext prec number to memory
7922 
7923 	tst.l		%d1			# did dstore fail?
7924 	bne.w		fout_ext_err		# yes
7925 
7926 	rts
7927 
7928 # we don't want to do the write if the exception occurred in supervisor mode
7929 # so _mem_write2() handles this for us.
7930 fout_pack_a7:
7931 	bsr.l		_mem_write2		# write ext prec number to memory
7932 
7933 	tst.l		%d1			# did dstore fail?
7934 	bne.w		fout_ext_err		# yes
7935 
7936 	rts
7937 
7938 fout_pack_not_norm:
7939 	cmpi.b		%d0,&DENORM		# is it a DENORM?
7940 	beq.w		fout_pack_norm		# yes
7941 	lea		FP_SRC(%a6),%a0
7942 	clr.w		2+FP_SRC_EX(%a6)
7943 	cmpi.b		%d0,&SNAN		# is it an SNAN?
7944 	beq.b		fout_pack_snan		# yes
7945 	bra.b		fout_pack_write		# no
7946 
7947 fout_pack_snan:
7948 	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949 	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
7950 	bra.b		fout_pack_write
7951 
7952 #########################################################################
7953 # XDEF ****************************************************************	#
7954 #	fmul(): emulates the fmul instruction				#
7955 #	fsmul(): emulates the fsmul instruction				#
7956 #	fdmul(): emulates the fdmul instruction				#
7957 #									#
7958 # XREF ****************************************************************	#
7959 #	scale_to_zero_src() - scale src exponent to zero		#
7960 #	scale_to_zero_dst() - scale dst exponent to zero		#
7961 #	unf_res() - return default underflow result			#
7962 #	ovf_res() - return default overflow result			#
7963 #	res_qnan() - return QNAN result					#
7964 #	res_snan() - return SNAN result					#
7965 #									#
7966 # INPUT ***************************************************************	#
7967 #	a0 = pointer to extended precision source operand		#
7968 #	a1 = pointer to extended precision destination operand		#
7969 #	d0  rnd prec,mode						#
7970 #									#
7971 # OUTPUT **************************************************************	#
7972 #	fp0 = result							#
7973 #	fp1 = EXOP (if exception occurred)				#
7974 #									#
7975 # ALGORITHM ***********************************************************	#
7976 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
7977 # norms/denorms into ext/sgl/dbl precision.				#
7978 #	For norms/denorms, scale the exponents such that a multiply	#
7979 # instruction won't cause an exception. Use the regular fmul to		#
7980 # compute a result. Check if the regular operands would have taken	#
7981 # an exception. If so, return the default overflow/underflow result	#
7982 # and return the EXOP if exceptions are enabled. Else, scale the	#
7983 # result operand to the proper exponent.				#
7984 #									#
7985 #########################################################################
7986 
7987 	align		0x10
7988 tbl_fmul_ovfl:
7989 	long		0x3fff - 0x7ffe		# ext_max
7990 	long		0x3fff - 0x407e		# sgl_max
7991 	long		0x3fff - 0x43fe		# dbl_max
7992 tbl_fmul_unfl:
7993 	long		0x3fff + 0x0001		# ext_unfl
7994 	long		0x3fff - 0x3f80		# sgl_unfl
7995 	long		0x3fff - 0x3c00		# dbl_unfl
7996 
7997 	global		fsmul
7998 fsmul:
7999 	andi.b		&0x30,%d0		# clear rnd prec
8000 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8001 	bra.b		fmul
8002 
8003 	global		fdmul
8004 fdmul:
8005 	andi.b		&0x30,%d0
8006 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8007 
8008 	global		fmul
8009 fmul:
8010 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8011 
8012 	clr.w		%d1
8013 	mov.b		DTAG(%a6),%d1
8014 	lsl.b		&0x3,%d1
8015 	or.b		STAG(%a6),%d1		# combine src tags
8016 	bne.w		fmul_not_norm		# optimize on non-norm input
8017 
8018 fmul_norm:
8019 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8020 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8021 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8022 
8023 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8024 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8025 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8026 
8027 	bsr.l		scale_to_zero_src	# scale src exponent
8028 	mov.l		%d0,-(%sp)		# save scale factor 1
8029 
8030 	bsr.l		scale_to_zero_dst	# scale dst exponent
8031 
8032 	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
8033 
8034 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8035 	lsr.b		&0x6,%d1		# shift to lo bits
8036 	mov.l		(%sp)+,%d0		# load S.F.
8037 	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038 	beq.w		fmul_may_ovfl		# result may rnd to overflow
8039 	blt.w		fmul_ovfl		# result will overflow
8040 
8041 	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042 	beq.w		fmul_may_unfl		# result may rnd to no unfl
8043 	bgt.w		fmul_unfl		# result will underflow
8044 
8045 #
8046 # NORMAL:
8047 # - the result of the multiply operation will neither overflow nor underflow.
8048 # - do the multiply to the proper precision and rounding mode.
8049 # - scale the result exponent using the scale factor. if both operands were
8050 # normalized then we really don't need to go through this scaling. but for now,
8051 # this will do.
8052 #
8053 fmul_normal:
8054 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8055 
8056 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8057 	fmov.l		&0x0,%fpsr		# clear FPSR
8058 
8059 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8060 
8061 	fmov.l		%fpsr,%d1		# save status
8062 	fmov.l		&0x0,%fpcr		# clear FPCR
8063 
8064 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8065 
8066 fmul_normal_exit:
8067 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8068 	mov.l		%d2,-(%sp)		# save d2
8069 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8070 	mov.l		%d1,%d2			# make a copy
8071 	andi.l		&0x7fff,%d1		# strip sign
8072 	andi.w		&0x8000,%d2		# keep old sign
8073 	sub.l		%d0,%d1			# add scale factor
8074 	or.w		%d2,%d1			# concat old sign,new exp
8075 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8076 	mov.l		(%sp)+,%d2		# restore d2
8077 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8078 	rts
8079 
8080 #
8081 # OVERFLOW:
8082 # - the result of the multiply operation is an overflow.
8083 # - do the multiply to the proper precision and rounding mode in order to
8084 # set the inexact bits.
8085 # - calculate the default result and return it in fp0.
8086 # - if overflow or inexact is enabled, we need a multiply result rounded to
8087 # extended precision. if the original operation was extended, then we have this
8088 # result. if the original operation was single or double, we have to do another
8089 # multiply using extended precision and the correct rounding mode. the result
8090 # of this operation then has its exponent scaled by -0x6000 to create the
8091 # exceptional operand.
8092 #
8093 fmul_ovfl:
8094 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8095 
8096 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8097 	fmov.l		&0x0,%fpsr		# clear FPSR
8098 
8099 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8100 
8101 	fmov.l		%fpsr,%d1		# save status
8102 	fmov.l		&0x0,%fpcr		# clear FPCR
8103 
8104 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8105 
8106 # save setting this until now because this is where fmul_may_ovfl may jump in
8107 fmul_ovfl_tst:
8108 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109 
8110 	mov.b		FPCR_ENABLE(%a6),%d1
8111 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8112 	bne.b		fmul_ovfl_ena		# yes
8113 
8114 # calculate the default result
8115 fmul_ovfl_dis:
8116 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8117 	sne		%d1			# set sign param accordingly
8118 	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
8119 	bsr.l		ovf_res			# calculate default result
8120 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8121 	fmovm.x		(%a0),&0x80		# return default result in fp0
8122 	rts
8123 
8124 #
8125 # OVFL is enabled; Create EXOP:
8126 # - if precision is extended, then we have the EXOP. simply bias the exponent
8127 # with an extra -0x6000. if the precision is single or double, we need to
8128 # calculate a result rounded to extended precision.
8129 #
8130 fmul_ovfl_ena:
8131 	mov.l		L_SCR3(%a6),%d1
8132 	andi.b		&0xc0,%d1		# test the rnd prec
8133 	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
8134 
8135 fmul_ovfl_ena_cont:
8136 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8137 
8138 	mov.l		%d2,-(%sp)		# save d2
8139 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8140 	mov.w		%d1,%d2			# make a copy
8141 	andi.l		&0x7fff,%d1		# strip sign
8142 	sub.l		%d0,%d1			# add scale factor
8143 	subi.l		&0x6000,%d1		# subtract bias
8144 	andi.w		&0x7fff,%d1		# clear sign bit
8145 	andi.w		&0x8000,%d2		# keep old sign
8146 	or.w		%d2,%d1			# concat old sign,new exp
8147 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8148 	mov.l		(%sp)+,%d2		# restore d2
8149 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8150 	bra.b		fmul_ovfl_dis
8151 
8152 fmul_ovfl_ena_sd:
8153 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8154 
8155 	mov.l		L_SCR3(%a6),%d1
8156 	andi.b		&0x30,%d1		# keep rnd mode only
8157 	fmov.l		%d1,%fpcr		# set FPCR
8158 
8159 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8160 
8161 	fmov.l		&0x0,%fpcr		# clear FPCR
8162 	bra.b		fmul_ovfl_ena_cont
8163 
8164 #
8165 # may OVERFLOW:
8166 # - the result of the multiply operation MAY overflow.
8167 # - do the multiply to the proper precision and rounding mode in order to
8168 # set the inexact bits.
8169 # - calculate the default result and return it in fp0.
8170 #
8171 fmul_may_ovfl:
8172 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8173 
8174 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8175 	fmov.l		&0x0,%fpsr		# clear FPSR
8176 
8177 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8178 
8179 	fmov.l		%fpsr,%d1		# save status
8180 	fmov.l		&0x0,%fpcr		# clear FPCR
8181 
8182 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8183 
8184 	fabs.x		%fp0,%fp1		# make a copy of result
8185 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8186 	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
8187 
8188 # no, it didn't overflow; we have correct result
8189 	bra.w		fmul_normal_exit
8190 
8191 #
8192 # UNDERFLOW:
8193 # - the result of the multiply operation is an underflow.
8194 # - do the multiply to the proper precision and rounding mode in order to
8195 # set the inexact bits.
8196 # - calculate the default result and return it in fp0.
8197 # - if overflow or inexact is enabled, we need a multiply result rounded to
8198 # extended precision. if the original operation was extended, then we have this
8199 # result. if the original operation was single or double, we have to do another
8200 # multiply using extended precision and the correct rounding mode. the result
8201 # of this operation then has its exponent scaled by -0x6000 to create the
8202 # exceptional operand.
8203 #
8204 fmul_unfl:
8205 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206 
8207 # for fun, let's use only extended precision, round to zero. then, let
8208 # the unf_res() routine figure out all the rest.
8209 # will we get the correct answer.
8210 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8211 
8212 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8213 	fmov.l		&0x0,%fpsr		# clear FPSR
8214 
8215 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8216 
8217 	fmov.l		%fpsr,%d1		# save status
8218 	fmov.l		&0x0,%fpcr		# clear FPCR
8219 
8220 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8221 
8222 	mov.b		FPCR_ENABLE(%a6),%d1
8223 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8224 	bne.b		fmul_unfl_ena		# yes
8225 
8226 fmul_unfl_dis:
8227 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8228 
8229 	lea		FP_SCR0(%a6),%a0	# pass: result addr
8230 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8231 	bsr.l		unf_res			# calculate default result
8232 	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
8233 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8234 	rts
8235 
8236 #
8237 # UNFL is enabled.
8238 #
8239 fmul_unfl_ena:
8240 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
8241 
8242 	mov.l		L_SCR3(%a6),%d1
8243 	andi.b		&0xc0,%d1		# is precision extended?
8244 	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
8245 
8246 # if the rnd mode is anything but RZ, then we have to re-do the above
8247 # multiplication because we used RZ for all.
8248 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8249 
8250 fmul_unfl_ena_cont:
8251 	fmov.l		&0x0,%fpsr		# clear FPSR
8252 
8253 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8254 
8255 	fmov.l		&0x0,%fpcr		# clear FPCR
8256 
8257 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
8258 	mov.l		%d2,-(%sp)		# save d2
8259 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8260 	mov.l		%d1,%d2			# make a copy
8261 	andi.l		&0x7fff,%d1		# strip sign
8262 	andi.w		&0x8000,%d2		# keep old sign
8263 	sub.l		%d0,%d1			# add scale factor
8264 	addi.l		&0x6000,%d1		# add bias
8265 	andi.w		&0x7fff,%d1
8266 	or.w		%d2,%d1			# concat old sign,new exp
8267 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8268 	mov.l		(%sp)+,%d2		# restore d2
8269 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8270 	bra.w		fmul_unfl_dis
8271 
8272 fmul_unfl_ena_sd:
8273 	mov.l		L_SCR3(%a6),%d1
8274 	andi.b		&0x30,%d1		# use only rnd mode
8275 	fmov.l		%d1,%fpcr		# set FPCR
8276 
8277 	bra.b		fmul_unfl_ena_cont
8278 
8279 # MAY UNDERFLOW:
8280 # -use the correct rounding mode and precision. this code favors operations
8281 # that do not underflow.
8282 fmul_may_unfl:
8283 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8284 
8285 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8286 	fmov.l		&0x0,%fpsr		# clear FPSR
8287 
8288 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8289 
8290 	fmov.l		%fpsr,%d1		# save status
8291 	fmov.l		&0x0,%fpcr		# clear FPCR
8292 
8293 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8294 
8295 	fabs.x		%fp0,%fp1		# make a copy of result
8296 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
8297 	fbgt.w		fmul_normal_exit	# no; no underflow occurred
8298 	fblt.w		fmul_unfl		# yes; underflow occurred
8299 
8300 #
8301 # we still don't know if underflow occurred. result is ~ equal to 2. but,
8302 # we don't know if the result was an underflow that rounded up to a 2 or
8303 # a normalized number that rounded down to a 2. so, redo the entire operation
8304 # using RZ as the rounding mode to see what the pre-rounded result is.
8305 # this case should be relatively rare.
8306 #
8307 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
8308 
8309 	mov.l		L_SCR3(%a6),%d1
8310 	andi.b		&0xc0,%d1		# keep rnd prec
8311 	ori.b		&rz_mode*0x10,%d1	# insert RZ
8312 
8313 	fmov.l		%d1,%fpcr		# set FPCR
8314 	fmov.l		&0x0,%fpsr		# clear FPSR
8315 
8316 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8317 
8318 	fmov.l		&0x0,%fpcr		# clear FPCR
8319 	fabs.x		%fp1			# make absolute value
8320 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
8321 	fbge.w		fmul_normal_exit	# no; no underflow occurred
8322 	bra.w		fmul_unfl		# yes, underflow occurred
8323 
8324 ################################################################################
8325 
8326 #
8327 # Multiply: inputs are not both normalized; what are they?
8328 #
8329 fmul_not_norm:
8330 	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331 	jmp		(tbl_fmul_op.b,%pc,%d1.w)
8332 
8333 	swbeg		&48
8334 tbl_fmul_op:
8335 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8336 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8337 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8338 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8339 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8340 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8341 	short		tbl_fmul_op	- tbl_fmul_op #
8342 	short		tbl_fmul_op	- tbl_fmul_op #
8343 
8344 	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
8345 	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
8346 	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
8347 	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
8348 	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
8349 	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
8350 	short		tbl_fmul_op	- tbl_fmul_op #
8351 	short		tbl_fmul_op	- tbl_fmul_op #
8352 
8353 	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
8354 	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
8355 	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
8356 	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
8357 	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
8358 	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
8359 	short		tbl_fmul_op	- tbl_fmul_op #
8360 	short		tbl_fmul_op	- tbl_fmul_op #
8361 
8362 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
8363 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
8364 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
8365 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
8366 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
8367 	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
8368 	short		tbl_fmul_op	- tbl_fmul_op #
8369 	short		tbl_fmul_op	- tbl_fmul_op #
8370 
8371 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8372 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8373 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8374 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8375 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8376 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8377 	short		tbl_fmul_op	- tbl_fmul_op #
8378 	short		tbl_fmul_op	- tbl_fmul_op #
8379 
8380 	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
8381 	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
8382 	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
8383 	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
8384 	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
8385 	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
8386 	short		tbl_fmul_op	- tbl_fmul_op #
8387 	short		tbl_fmul_op	- tbl_fmul_op #
8388 
8389 fmul_res_operr:
8390 	bra.l		res_operr
8391 fmul_res_snan:
8392 	bra.l		res_snan
8393 fmul_res_qnan:
8394 	bra.l		res_qnan
8395 
8396 #
8397 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398 #
8399 	global		fmul_zero		# global for fsglmul
8400 fmul_zero:
8401 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8402 	mov.b		DST_EX(%a1),%d1
8403 	eor.b		%d0,%d1
8404 	bpl.b		fmul_zero_p		# result ZERO is pos.
8405 fmul_zero_n:
8406 	fmov.s		&0x80000000,%fp0	# load -ZERO
8407 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408 	rts
8409 fmul_zero_p:
8410 	fmov.s		&0x00000000,%fp0	# load +ZERO
8411 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
8412 	rts
8413 
8414 #
8415 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416 #
8417 # Note: The j-bit for an infinity is a don't-care. However, to be
8418 # strictly compatible w/ the 68881/882, we make sure to return an
8419 # INF w/ the j-bit set if the input INF j-bit was set. Destination
8420 # INFs take priority.
8421 #
8422 	global		fmul_inf_dst		# global for fsglmul
8423 fmul_inf_dst:
8424 	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
8425 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8426 	mov.b		DST_EX(%a1),%d1
8427 	eor.b		%d0,%d1
8428 	bpl.b		fmul_inf_dst_p		# result INF is pos.
8429 fmul_inf_dst_n:
8430 	fabs.x		%fp0			# clear result sign
8431 	fneg.x		%fp0			# set result sign
8432 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433 	rts
8434 fmul_inf_dst_p:
8435 	fabs.x		%fp0			# clear result sign
8436 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
8437 	rts
8438 
8439 	global		fmul_inf_src		# global for fsglmul
8440 fmul_inf_src:
8441 	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
8442 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8443 	mov.b		DST_EX(%a1),%d1
8444 	eor.b		%d0,%d1
8445 	bpl.b		fmul_inf_dst_p		# result INF is pos.
8446 	bra.b		fmul_inf_dst_n
8447 
8448 #########################################################################
8449 # XDEF ****************************************************************	#
8450 #	fin(): emulates the fmove instruction				#
8451 #	fsin(): emulates the fsmove instruction				#
8452 #	fdin(): emulates the fdmove instruction				#
8453 #									#
8454 # XREF ****************************************************************	#
8455 #	norm() - normalize mantissa for EXOP on denorm			#
8456 #	scale_to_zero_src() - scale src exponent to zero		#
8457 #	ovf_res() - return default overflow result			#
8458 #	unf_res() - return default underflow result			#
8459 #	res_qnan_1op() - return QNAN result				#
8460 #	res_snan_1op() - return SNAN result				#
8461 #									#
8462 # INPUT ***************************************************************	#
8463 #	a0 = pointer to extended precision source operand		#
8464 #	d0 = round prec/mode						#
8465 #									#
8466 # OUTPUT **************************************************************	#
8467 #	fp0 = result							#
8468 #	fp1 = EXOP (if exception occurred)				#
8469 #									#
8470 # ALGORITHM ***********************************************************	#
8471 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
8472 # norms into extended, single, and double precision.			#
8473 #	Norms can be emulated w/ a regular fmove instruction. For	#
8474 # sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
8475 # if the result would have overflowed/underflowed. If so, use unf_res()	#
8476 # or ovf_res() to return the default result. Also return EXOP if	#
8477 # exception is enabled. If no exception, return the default result.	#
8478 #	Unnorms don't pass through here.				#
8479 #									#
8480 #########################################################################
8481 
8482 	global		fsin
8483 fsin:
8484 	andi.b		&0x30,%d0		# clear rnd prec
8485 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
8486 	bra.b		fin
8487 
8488 	global		fdin
8489 fdin:
8490 	andi.b		&0x30,%d0		# clear rnd prec
8491 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
8492 
8493 	global		fin
8494 fin:
8495 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8496 
8497 	mov.b		STAG(%a6),%d1		# fetch src optype tag
8498 	bne.w		fin_not_norm		# optimize on non-norm input
8499 
8500 #
8501 # FP MOVE IN: NORMs and DENORMs ONLY!
8502 #
8503 fin_norm:
8504 	andi.b		&0xc0,%d0		# is precision extended?
8505 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8506 
8507 #
8508 # precision selected is extended. so...we cannot get an underflow
8509 # or overflow because of rounding to the correct precision. so...
8510 # skip the scaling and unscaling...
8511 #
8512 	tst.b		SRC_EX(%a0)		# is the operand negative?
8513 	bpl.b		fin_norm_done		# no
8514 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8515 fin_norm_done:
8516 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8517 	rts
8518 
8519 #
8520 # for an extended precision DENORM, the UNFL exception bit is set
8521 # the accrued bit is NOT set in this instance(no inexactness!)
8522 #
8523 fin_denorm:
8524 	andi.b		&0xc0,%d0		# is precision extended?
8525 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8526 
8527 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528 	tst.b		SRC_EX(%a0)		# is the operand negative?
8529 	bpl.b		fin_denorm_done		# no
8530 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8531 fin_denorm_done:
8532 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8533 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534 	bne.b		fin_denorm_unfl_ena	# yes
8535 	rts
8536 
8537 #
8538 # the input is an extended DENORM and underflow is enabled in the FPCR.
8539 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540 # exponent and insert back into the operand.
8541 #
8542 fin_denorm_unfl_ena:
8543 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8544 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8545 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8546 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
8547 	bsr.l		norm			# normalize result
8548 	neg.w		%d0			# new exponent = -(shft val)
8549 	addi.w		&0x6000,%d0		# add new bias to exponent
8550 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
8551 	andi.w		&0x8000,%d1		# keep old sign
8552 	andi.w		&0x7fff,%d0		# clear sign position
8553 	or.w		%d1,%d0			# concat new exo,old sign
8554 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
8555 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8556 	rts
8557 
8558 #
8559 # operand is to be rounded to single or double precision
8560 #
8561 fin_not_ext:
8562 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
8563 	bne.b		fin_dbl
8564 
8565 #
8566 # operand is to be rounded to single precision
8567 #
8568 fin_sgl:
8569 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8570 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8571 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8572 	bsr.l		scale_to_zero_src	# calculate scale factor
8573 
8574 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
8575 	bge.w		fin_sd_unfl		# yes; go handle underflow
8576 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
8577 	beq.w		fin_sd_may_ovfl		# maybe; go check
8578 	blt.w		fin_sd_ovfl		# yes; go handle overflow
8579 
8580 #
8581 # operand will NOT overflow or underflow when moved into the fp reg file
8582 #
8583 fin_sd_normal:
8584 	fmov.l		&0x0,%fpsr		# clear FPSR
8585 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8586 
8587 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8588 
8589 	fmov.l		%fpsr,%d1		# save FPSR
8590 	fmov.l		&0x0,%fpcr		# clear FPCR
8591 
8592 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8593 
8594 fin_sd_normal_exit:
8595 	mov.l		%d2,-(%sp)		# save d2
8596 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8597 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8598 	mov.w		%d1,%d2			# make a copy
8599 	andi.l		&0x7fff,%d1		# strip sign
8600 	sub.l		%d0,%d1			# add scale factor
8601 	andi.w		&0x8000,%d2		# keep old sign
8602 	or.w		%d1,%d2			# concat old sign,new exponent
8603 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
8604 	mov.l		(%sp)+,%d2		# restore d2
8605 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8606 	rts
8607 
8608 #
8609 # operand is to be rounded to double precision
8610 #
8611 fin_dbl:
8612 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8613 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8614 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8615 	bsr.l		scale_to_zero_src	# calculate scale factor
8616 
8617 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
8618 	bge.w		fin_sd_unfl		# yes; go handle underflow
8619 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
8620 	beq.w		fin_sd_may_ovfl		# maybe; go check
8621 	blt.w		fin_sd_ovfl		# yes; go handle overflow
8622 	bra.w		fin_sd_normal		# no; ho handle normalized op
8623 
8624 #
8625 # operand WILL underflow when moved in to the fp register file
8626 #
8627 fin_sd_unfl:
8628 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629 
8630 	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
8631 	bpl.b		fin_sd_unfl_tst
8632 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
8633 
8634 # if underflow or inexact is enabled, then go calculate the EXOP first.
8635 fin_sd_unfl_tst:
8636 	mov.b		FPCR_ENABLE(%a6),%d1
8637 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8638 	bne.b		fin_sd_unfl_ena		# yes
8639 
8640 fin_sd_unfl_dis:
8641 	lea		FP_SCR0(%a6),%a0	# pass: result addr
8642 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8643 	bsr.l		unf_res			# calculate default result
8644 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
8645 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8646 	rts
8647 
8648 #
8649 # operand will underflow AND underflow or inexact is enabled.
8650 # Therefore, we must return the result rounded to extended precision.
8651 #
8652 fin_sd_unfl_ena:
8653 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
8656 
8657 	mov.l		%d2,-(%sp)		# save d2
8658 	mov.w		%d1,%d2			# make a copy
8659 	andi.l		&0x7fff,%d1		# strip sign
8660 	sub.l		%d0,%d1			# subtract scale factor
8661 	andi.w		&0x8000,%d2		# extract old sign
8662 	addi.l		&0x6000,%d1		# add new bias
8663 	andi.w		&0x7fff,%d1
8664 	or.w		%d1,%d2			# concat old sign,new exp
8665 	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
8666 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
8667 	mov.l		(%sp)+,%d2		# restore d2
8668 	bra.b		fin_sd_unfl_dis
8669 
8670 #
8671 # operand WILL overflow.
8672 #
8673 fin_sd_ovfl:
8674 	fmov.l		&0x0,%fpsr		# clear FPSR
8675 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8676 
8677 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8678 
8679 	fmov.l		&0x0,%fpcr		# clear FPCR
8680 	fmov.l		%fpsr,%d1		# save FPSR
8681 
8682 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8683 
8684 fin_sd_ovfl_tst:
8685 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686 
8687 	mov.b		FPCR_ENABLE(%a6),%d1
8688 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8689 	bne.b		fin_sd_ovfl_ena		# yes
8690 
8691 #
8692 # OVFL is not enabled; therefore, we must create the default result by
8693 # calling ovf_res().
8694 #
8695 fin_sd_ovfl_dis:
8696 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8697 	sne		%d1			# set sign param accordingly
8698 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
8699 	bsr.l		ovf_res			# calculate default result
8700 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8701 	fmovm.x		(%a0),&0x80		# return default result in fp0
8702 	rts
8703 
8704 #
8705 # OVFL is enabled.
8706 # the INEX2 bit has already been updated by the round to the correct precision.
8707 # now, round to extended(and don't alter the FPSR).
8708 #
8709 fin_sd_ovfl_ena:
8710 	mov.l		%d2,-(%sp)		# save d2
8711 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8712 	mov.l		%d1,%d2			# make a copy
8713 	andi.l		&0x7fff,%d1		# strip sign
8714 	andi.w		&0x8000,%d2		# keep old sign
8715 	sub.l		%d0,%d1			# add scale factor
8716 	sub.l		&0x6000,%d1		# subtract bias
8717 	andi.w		&0x7fff,%d1
8718 	or.w		%d2,%d1
8719 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8720 	mov.l		(%sp)+,%d2		# restore d2
8721 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8722 	bra.b		fin_sd_ovfl_dis
8723 
8724 #
8725 # the move in MAY overflow. so...
8726 #
8727 fin_sd_may_ovfl:
8728 	fmov.l		&0x0,%fpsr		# clear FPSR
8729 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8730 
8731 	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
8732 
8733 	fmov.l		%fpsr,%d1		# save status
8734 	fmov.l		&0x0,%fpcr		# clear FPCR
8735 
8736 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8737 
8738 	fabs.x		%fp0,%fp1		# make a copy of result
8739 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8740 	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
8741 
8742 # no, it didn't overflow; we have correct result
8743 	bra.w		fin_sd_normal_exit
8744 
8745 ##########################################################################
8746 
8747 #
8748 # operand is not a NORM: check its optype and branch accordingly
8749 #
8750 fin_not_norm:
8751 	cmpi.b		%d1,&DENORM		# weed out DENORM
8752 	beq.w		fin_denorm
8753 	cmpi.b		%d1,&SNAN		# weed out SNANs
8754 	beq.l		res_snan_1op
8755 	cmpi.b		%d1,&QNAN		# weed out QNANs
8756 	beq.l		res_qnan_1op
8757 
8758 #
8759 # do the fmove in; at this point, only possible ops are ZERO and INF.
8760 # use fmov to determine ccodes.
8761 # prec:mode should be zero at this point but it won't affect answer anyways.
8762 #
8763 	fmov.x		SRC(%a0),%fp0		# do fmove in
8764 	fmov.l		%fpsr,%d0		# no exceptions possible
8765 	rol.l		&0x8,%d0		# put ccodes in lo byte
8766 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
8767 	rts
8768 
8769 #########################################################################
8770 # XDEF ****************************************************************	#
8771 #	fdiv(): emulates the fdiv instruction				#
8772 #	fsdiv(): emulates the fsdiv instruction				#
8773 #	fddiv(): emulates the fddiv instruction				#
8774 #									#
8775 # XREF ****************************************************************	#
8776 #	scale_to_zero_src() - scale src exponent to zero		#
8777 #	scale_to_zero_dst() - scale dst exponent to zero		#
8778 #	unf_res() - return default underflow result			#
8779 #	ovf_res() - return default overflow result			#
8780 #	res_qnan() - return QNAN result					#
8781 #	res_snan() - return SNAN result					#
8782 #									#
8783 # INPUT ***************************************************************	#
8784 #	a0 = pointer to extended precision source operand		#
8785 #	a1 = pointer to extended precision destination operand		#
8786 #	d0  rnd prec,mode						#
8787 #									#
8788 # OUTPUT **************************************************************	#
8789 #	fp0 = result							#
8790 #	fp1 = EXOP (if exception occurred)				#
8791 #									#
8792 # ALGORITHM ***********************************************************	#
8793 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
8794 # norms/denorms into ext/sgl/dbl precision.				#
8795 #	For norms/denorms, scale the exponents such that a divide	#
8796 # instruction won't cause an exception. Use the regular fdiv to		#
8797 # compute a result. Check if the regular operands would have taken	#
8798 # an exception. If so, return the default overflow/underflow result	#
8799 # and return the EXOP if exceptions are enabled. Else, scale the	#
8800 # result operand to the proper exponent.				#
8801 #									#
8802 #########################################################################
8803 
8804 	align		0x10
8805 tbl_fdiv_unfl:
8806 	long		0x3fff - 0x0000		# ext_unfl
8807 	long		0x3fff - 0x3f81		# sgl_unfl
8808 	long		0x3fff - 0x3c01		# dbl_unfl
8809 
8810 tbl_fdiv_ovfl:
8811 	long		0x3fff - 0x7ffe		# ext overflow exponent
8812 	long		0x3fff - 0x407e		# sgl overflow exponent
8813 	long		0x3fff - 0x43fe		# dbl overflow exponent
8814 
8815 	global		fsdiv
8816 fsdiv:
8817 	andi.b		&0x30,%d0		# clear rnd prec
8818 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8819 	bra.b		fdiv
8820 
8821 	global		fddiv
8822 fddiv:
8823 	andi.b		&0x30,%d0		# clear rnd prec
8824 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8825 
8826 	global		fdiv
8827 fdiv:
8828 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8829 
8830 	clr.w		%d1
8831 	mov.b		DTAG(%a6),%d1
8832 	lsl.b		&0x3,%d1
8833 	or.b		STAG(%a6),%d1		# combine src tags
8834 
8835 	bne.w		fdiv_not_norm		# optimize on non-norm input
8836 
8837 #
8838 # DIVIDE: NORMs and DENORMs ONLY!
8839 #
8840 fdiv_norm:
8841 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8842 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8843 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8844 
8845 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8846 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8847 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8848 
8849 	bsr.l		scale_to_zero_src	# scale src exponent
8850 	mov.l		%d0,-(%sp)		# save scale factor 1
8851 
8852 	bsr.l		scale_to_zero_dst	# scale dst exponent
8853 
8854 	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
8855 	add.l		%d0,(%sp)
8856 
8857 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8858 	lsr.b		&0x6,%d1		# shift to lo bits
8859 	mov.l		(%sp)+,%d0		# load S.F.
8860 	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861 	ble.w		fdiv_may_ovfl		# result will overflow
8862 
8863 	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864 	beq.w		fdiv_may_unfl		# maybe
8865 	bgt.w		fdiv_unfl		# yes; go handle underflow
8866 
8867 fdiv_normal:
8868 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8869 
8870 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
8871 	fmov.l		&0x0,%fpsr		# clear FPSR
8872 
8873 	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
8874 
8875 	fmov.l		%fpsr,%d1		# save FPSR
8876 	fmov.l		&0x0,%fpcr		# clear FPCR
8877 
8878 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8879 
8880 fdiv_normal_exit:
8881 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
8882 	mov.l		%d2,-(%sp)		# store d2
8883 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8884 	mov.l		%d1,%d2			# make a copy
8885 	andi.l		&0x7fff,%d1		# strip sign
8886 	andi.w		&0x8000,%d2		# keep old sign
8887 	sub.l		%d0,%d1			# add scale factor
8888 	or.w		%d2,%d1			# concat old sign,new exp
8889 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8890 	mov.l		(%sp)+,%d2		# restore d2
8891 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8892 	rts
8893 
8894 tbl_fdiv_ovfl2:
8895 	long		0x7fff
8896 	long		0x407f
8897 	long		0x43ff
8898 
8899 fdiv_no_ovfl:
8900 	mov.l		(%sp)+,%d0		# restore scale factor
8901 	bra.b		fdiv_normal_exit
8902 
8903 fdiv_may_ovfl:
8904 	mov.l		%d0,-(%sp)		# save scale factor
8905 
8906 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8907 
8908 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8909 	fmov.l		&0x0,%fpsr		# set FPSR
8910 
8911 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8912 
8913 	fmov.l		%fpsr,%d0
8914 	fmov.l		&0x0,%fpcr
8915 
8916 	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
8917 
8918 	fmovm.x		&0x01,-(%sp)		# save result to stack
8919 	mov.w		(%sp),%d0		# fetch new exponent
8920 	add.l		&0xc,%sp		# clear result from stack
8921 	andi.l		&0x7fff,%d0		# strip sign
8922 	sub.l		(%sp),%d0		# add scale factor
8923 	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924 	blt.b		fdiv_no_ovfl
8925 	mov.l		(%sp)+,%d0
8926 
8927 fdiv_ovfl_tst:
8928 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929 
8930 	mov.b		FPCR_ENABLE(%a6),%d1
8931 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8932 	bne.b		fdiv_ovfl_ena		# yes
8933 
8934 fdiv_ovfl_dis:
8935 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8936 	sne		%d1			# set sign param accordingly
8937 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
8938 	bsr.l		ovf_res			# calculate default result
8939 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
8940 	fmovm.x		(%a0),&0x80		# return default result in fp0
8941 	rts
8942 
8943 fdiv_ovfl_ena:
8944 	mov.l		L_SCR3(%a6),%d1
8945 	andi.b		&0xc0,%d1		# is precision extended?
8946 	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
8947 
8948 fdiv_ovfl_ena_cont:
8949 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8950 
8951 	mov.l		%d2,-(%sp)		# save d2
8952 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8953 	mov.w		%d1,%d2			# make a copy
8954 	andi.l		&0x7fff,%d1		# strip sign
8955 	sub.l		%d0,%d1			# add scale factor
8956 	subi.l		&0x6000,%d1		# subtract bias
8957 	andi.w		&0x7fff,%d1		# clear sign bit
8958 	andi.w		&0x8000,%d2		# keep old sign
8959 	or.w		%d2,%d1			# concat old sign,new exp
8960 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8961 	mov.l		(%sp)+,%d2		# restore d2
8962 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8963 	bra.b		fdiv_ovfl_dis
8964 
8965 fdiv_ovfl_ena_sd:
8966 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8967 
8968 	mov.l		L_SCR3(%a6),%d1
8969 	andi.b		&0x30,%d1		# keep rnd mode
8970 	fmov.l		%d1,%fpcr		# set FPCR
8971 
8972 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8973 
8974 	fmov.l		&0x0,%fpcr		# clear FPCR
8975 	bra.b		fdiv_ovfl_ena_cont
8976 
8977 fdiv_unfl:
8978 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979 
8980 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8981 
8982 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8983 	fmov.l		&0x0,%fpsr		# clear FPSR
8984 
8985 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8986 
8987 	fmov.l		%fpsr,%d1		# save status
8988 	fmov.l		&0x0,%fpcr		# clear FPCR
8989 
8990 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8991 
8992 	mov.b		FPCR_ENABLE(%a6),%d1
8993 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8994 	bne.b		fdiv_unfl_ena		# yes
8995 
8996 fdiv_unfl_dis:
8997 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8998 
8999 	lea		FP_SCR0(%a6),%a0	# pass: result addr
9000 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9001 	bsr.l		unf_res			# calculate default result
9002 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
9003 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9004 	rts
9005 
9006 #
9007 # UNFL is enabled.
9008 #
9009 fdiv_unfl_ena:
9010 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
9011 
9012 	mov.l		L_SCR3(%a6),%d1
9013 	andi.b		&0xc0,%d1		# is precision extended?
9014 	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
9015 
9016 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9017 
9018 fdiv_unfl_ena_cont:
9019 	fmov.l		&0x0,%fpsr		# clear FPSR
9020 
9021 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9022 
9023 	fmov.l		&0x0,%fpcr		# clear FPCR
9024 
9025 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
9026 	mov.l		%d2,-(%sp)		# save d2
9027 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9028 	mov.l		%d1,%d2			# make a copy
9029 	andi.l		&0x7fff,%d1		# strip sign
9030 	andi.w		&0x8000,%d2		# keep old sign
9031 	sub.l		%d0,%d1			# add scale factoer
9032 	addi.l		&0x6000,%d1		# add bias
9033 	andi.w		&0x7fff,%d1
9034 	or.w		%d2,%d1			# concat old sign,new exp
9035 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
9036 	mov.l		(%sp)+,%d2		# restore d2
9037 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9038 	bra.w		fdiv_unfl_dis
9039 
9040 fdiv_unfl_ena_sd:
9041 	mov.l		L_SCR3(%a6),%d1
9042 	andi.b		&0x30,%d1		# use only rnd mode
9043 	fmov.l		%d1,%fpcr		# set FPCR
9044 
9045 	bra.b		fdiv_unfl_ena_cont
9046 
9047 #
9048 # the divide operation MAY underflow:
9049 #
9050 fdiv_may_unfl:
9051 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
9052 
9053 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9054 	fmov.l		&0x0,%fpsr		# clear FPSR
9055 
9056 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
9057 
9058 	fmov.l		%fpsr,%d1		# save status
9059 	fmov.l		&0x0,%fpcr		# clear FPCR
9060 
9061 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9062 
9063 	fabs.x		%fp0,%fp1		# make a copy of result
9064 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
9065 	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
9066 	fblt.w		fdiv_unfl		# yes; underflow occurred
9067 
9068 #
9069 # we still don't know if underflow occurred. result is ~ equal to 1. but,
9070 # we don't know if the result was an underflow that rounded up to a 1
9071 # or a normalized number that rounded down to a 1. so, redo the entire
9072 # operation using RZ as the rounding mode to see what the pre-rounded
9073 # result is. this case should be relatively rare.
9074 #
9075 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
9076 
9077 	mov.l		L_SCR3(%a6),%d1
9078 	andi.b		&0xc0,%d1		# keep rnd prec
9079 	ori.b		&rz_mode*0x10,%d1	# insert RZ
9080 
9081 	fmov.l		%d1,%fpcr		# set FPCR
9082 	fmov.l		&0x0,%fpsr		# clear FPSR
9083 
9084 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9085 
9086 	fmov.l		&0x0,%fpcr		# clear FPCR
9087 	fabs.x		%fp1			# make absolute value
9088 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
9089 	fbge.w		fdiv_normal_exit	# no; no underflow occurred
9090 	bra.w		fdiv_unfl		# yes; underflow occurred
9091 
9092 ############################################################################
9093 
9094 #
9095 # Divide: inputs are not both normalized; what are they?
9096 #
9097 fdiv_not_norm:
9098 	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099 	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
9100 
9101 	swbeg		&48
9102 tbl_fdiv_op:
9103 	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
9104 	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
9105 	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
9106 	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
9107 	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
9108 	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
9109 	short		tbl_fdiv_op	- tbl_fdiv_op #
9110 	short		tbl_fdiv_op	- tbl_fdiv_op #
9111 
9112 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
9113 	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
9114 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
9115 	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
9116 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
9117 	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
9118 	short		tbl_fdiv_op	- tbl_fdiv_op #
9119 	short		tbl_fdiv_op	- tbl_fdiv_op #
9120 
9121 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
9122 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
9123 	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
9124 	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
9125 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
9126 	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
9127 	short		tbl_fdiv_op	- tbl_fdiv_op #
9128 	short		tbl_fdiv_op	- tbl_fdiv_op #
9129 
9130 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
9131 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
9132 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
9133 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
9134 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
9135 	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
9136 	short		tbl_fdiv_op	- tbl_fdiv_op #
9137 	short		tbl_fdiv_op	- tbl_fdiv_op #
9138 
9139 	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
9140 	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
9141 	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
9142 	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
9143 	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
9144 	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
9145 	short		tbl_fdiv_op	- tbl_fdiv_op #
9146 	short		tbl_fdiv_op	- tbl_fdiv_op #
9147 
9148 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
9149 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
9150 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
9151 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
9152 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
9153 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
9154 	short		tbl_fdiv_op	- tbl_fdiv_op #
9155 	short		tbl_fdiv_op	- tbl_fdiv_op #
9156 
9157 fdiv_res_qnan:
9158 	bra.l		res_qnan
9159 fdiv_res_snan:
9160 	bra.l		res_snan
9161 fdiv_res_operr:
9162 	bra.l		res_operr
9163 
9164 	global		fdiv_zero_load		# global for fsgldiv
9165 fdiv_zero_load:
9166 	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
9167 	mov.b		DST_EX(%a1),%d1		# or of input signs.
9168 	eor.b		%d0,%d1
9169 	bpl.b		fdiv_zero_load_p	# result is positive
9170 	fmov.s		&0x80000000,%fp0	# load a -ZERO
9171 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
9172 	rts
9173 fdiv_zero_load_p:
9174 	fmov.s		&0x00000000,%fp0	# load a +ZERO
9175 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
9176 	rts
9177 
9178 #
9179 # The destination was In Range and the source was a ZERO. The result,
9180 # Therefore, is an INF w/ the proper sign.
9181 # So, determine the sign and return a new INF (w/ the j-bit cleared).
9182 #
9183 	global		fdiv_inf_load		# global for fsgldiv
9184 fdiv_inf_load:
9185 	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186 	mov.b		SRC_EX(%a0),%d0		# load both signs
9187 	mov.b		DST_EX(%a1),%d1
9188 	eor.b		%d0,%d1
9189 	bpl.b		fdiv_inf_load_p		# result is positive
9190 	fmov.s		&0xff800000,%fp0	# make result -INF
9191 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192 	rts
9193 fdiv_inf_load_p:
9194 	fmov.s		&0x7f800000,%fp0	# make result +INF
9195 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
9196 	rts
9197 
9198 #
9199 # The destination was an INF w/ an In Range or ZERO source, the result is
9200 # an INF w/ the proper sign.
9201 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202 # dst INF is set, then then j-bit of the result INF is also set).
9203 #
9204 	global		fdiv_inf_dst		# global for fsgldiv
9205 fdiv_inf_dst:
9206 	mov.b		DST_EX(%a1),%d0		# load both signs
9207 	mov.b		SRC_EX(%a0),%d1
9208 	eor.b		%d0,%d1
9209 	bpl.b		fdiv_inf_dst_p		# result is positive
9210 
9211 	fmovm.x		DST(%a1),&0x80		# return result in fp0
9212 	fabs.x		%fp0			# clear sign bit
9213 	fneg.x		%fp0			# set sign bit
9214 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215 	rts
9216 
9217 fdiv_inf_dst_p:
9218 	fmovm.x		DST(%a1),&0x80		# return result in fp0
9219 	fabs.x		%fp0			# return positive INF
9220 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
9221 	rts
9222 
9223 #########################################################################
9224 # XDEF ****************************************************************	#
9225 #	fneg(): emulates the fneg instruction				#
9226 #	fsneg(): emulates the fsneg instruction				#
9227 #	fdneg(): emulates the fdneg instruction				#
9228 #									#
9229 # XREF ****************************************************************	#
9230 #	norm() - normalize a denorm to provide EXOP			#
9231 #	scale_to_zero_src() - scale sgl/dbl source exponent		#
9232 #	ovf_res() - return default overflow result			#
9233 #	unf_res() - return default underflow result			#
9234 #	res_qnan_1op() - return QNAN result				#
9235 #	res_snan_1op() - return SNAN result				#
9236 #									#
9237 # INPUT ***************************************************************	#
9238 #	a0 = pointer to extended precision source operand		#
9239 #	d0 = rnd prec,mode						#
9240 #									#
9241 # OUTPUT **************************************************************	#
9242 #	fp0 = result							#
9243 #	fp1 = EXOP (if exception occurred)				#
9244 #									#
9245 # ALGORITHM ***********************************************************	#
9246 #	Handle NANs, zeroes, and infinities as special cases. Separate	#
9247 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
9248 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
9249 # and an actual fneg performed to see if overflow/underflow would have	#
9250 # occurred. If so, return default underflow/overflow result. Else,	#
9251 # scale the result exponent and return result. FPSR gets set based on	#
9252 # the result value.							#
9253 #									#
9254 #########################################################################
9255 
9256 	global		fsneg
9257 fsneg:
9258 	andi.b		&0x30,%d0		# clear rnd prec
9259 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9260 	bra.b		fneg
9261 
9262 	global		fdneg
9263 fdneg:
9264 	andi.b		&0x30,%d0		# clear rnd prec
9265 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
9266 
9267 	global		fneg
9268 fneg:
9269 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9270 	mov.b		STAG(%a6),%d1
9271 	bne.w		fneg_not_norm		# optimize on non-norm input
9272 
9273 #
9274 # NEGATE SIGN : norms and denorms ONLY!
9275 #
9276 fneg_norm:
9277 	andi.b		&0xc0,%d0		# is precision extended?
9278 	bne.w		fneg_not_ext		# no; go handle sgl or dbl
9279 
9280 #
9281 # precision selected is extended. so...we can not get an underflow
9282 # or overflow because of rounding to the correct precision. so...
9283 # skip the scaling and unscaling...
9284 #
9285 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9286 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9287 	mov.w		SRC_EX(%a0),%d0
9288 	eori.w		&0x8000,%d0		# negate sign
9289 	bpl.b		fneg_norm_load		# sign is positive
9290 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9291 fneg_norm_load:
9292 	mov.w		%d0,FP_SCR0_EX(%a6)
9293 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9294 	rts
9295 
9296 #
9297 # for an extended precision DENORM, the UNFL exception bit is set
9298 # the accrued bit is NOT set in this instance(no inexactness!)
9299 #
9300 fneg_denorm:
9301 	andi.b		&0xc0,%d0		# is precision extended?
9302 	bne.b		fneg_not_ext		# no; go handle sgl or dbl
9303 
9304 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305 
9306 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9307 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9308 	mov.w		SRC_EX(%a0),%d0
9309 	eori.w		&0x8000,%d0		# negate sign
9310 	bpl.b		fneg_denorm_done	# no
9311 	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
9312 fneg_denorm_done:
9313 	mov.w		%d0,FP_SCR0_EX(%a6)
9314 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9315 
9316 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317 	bne.b		fneg_ext_unfl_ena	# yes
9318 	rts
9319 
9320 #
9321 # the input is an extended DENORM and underflow is enabled in the FPCR.
9322 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323 # exponent and insert back into the operand.
9324 #
9325 fneg_ext_unfl_ena:
9326 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9327 	bsr.l		norm			# normalize result
9328 	neg.w		%d0			# new exponent = -(shft val)
9329 	addi.w		&0x6000,%d0		# add new bias to exponent
9330 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9331 	andi.w		&0x8000,%d1		# keep old sign
9332 	andi.w		&0x7fff,%d0		# clear sign position
9333 	or.w		%d1,%d0			# concat old sign, new exponent
9334 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9335 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9336 	rts
9337 
9338 #
9339 # operand is either single or double
9340 #
9341 fneg_not_ext:
9342 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9343 	bne.b		fneg_dbl
9344 
9345 #
9346 # operand is to be rounded to single precision
9347 #
9348 fneg_sgl:
9349 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9350 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9351 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9352 	bsr.l		scale_to_zero_src	# calculate scale factor
9353 
9354 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9355 	bge.w		fneg_sd_unfl		# yes; go handle underflow
9356 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9357 	beq.w		fneg_sd_may_ovfl	# maybe; go check
9358 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9359 
9360 #
9361 # operand will NOT overflow or underflow when moved in to the fp reg file
9362 #
9363 fneg_sd_normal:
9364 	fmov.l		&0x0,%fpsr		# clear FPSR
9365 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9366 
9367 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9368 
9369 	fmov.l		%fpsr,%d1		# save FPSR
9370 	fmov.l		&0x0,%fpcr		# clear FPCR
9371 
9372 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9373 
9374 fneg_sd_normal_exit:
9375 	mov.l		%d2,-(%sp)		# save d2
9376 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9377 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9378 	mov.w		%d1,%d2			# make a copy
9379 	andi.l		&0x7fff,%d1		# strip sign
9380 	sub.l		%d0,%d1			# add scale factor
9381 	andi.w		&0x8000,%d2		# keep old sign
9382 	or.w		%d1,%d2			# concat old sign,new exp
9383 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
9384 	mov.l		(%sp)+,%d2		# restore d2
9385 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9386 	rts
9387 
9388 #
9389 # operand is to be rounded to double precision
9390 #
9391 fneg_dbl:
9392 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9393 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9394 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9395 	bsr.l		scale_to_zero_src	# calculate scale factor
9396 
9397 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
9398 	bge.b		fneg_sd_unfl		# yes; go handle underflow
9399 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
9400 	beq.w		fneg_sd_may_ovfl	# maybe; go check
9401 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9402 	bra.w		fneg_sd_normal		# no; ho handle normalized op
9403 
9404 #
9405 # operand WILL underflow when moved in to the fp register file
9406 #
9407 fneg_sd_unfl:
9408 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409 
9410 	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
9411 	bpl.b		fneg_sd_unfl_tst
9412 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
9413 
9414 # if underflow or inexact is enabled, go calculate EXOP first.
9415 fneg_sd_unfl_tst:
9416 	mov.b		FPCR_ENABLE(%a6),%d1
9417 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
9418 	bne.b		fneg_sd_unfl_ena	# yes
9419 
9420 fneg_sd_unfl_dis:
9421 	lea		FP_SCR0(%a6),%a0	# pass: result addr
9422 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9423 	bsr.l		unf_res			# calculate default result
9424 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
9425 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9426 	rts
9427 
9428 #
9429 # operand will underflow AND underflow is enabled.
9430 # Therefore, we must return the result rounded to extended precision.
9431 #
9432 fneg_sd_unfl_ena:
9433 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
9436 
9437 	mov.l		%d2,-(%sp)		# save d2
9438 	mov.l		%d1,%d2			# make a copy
9439 	andi.l		&0x7fff,%d1		# strip sign
9440 	andi.w		&0x8000,%d2		# keep old sign
9441 	sub.l		%d0,%d1			# subtract scale factor
9442 	addi.l		&0x6000,%d1		# add new bias
9443 	andi.w		&0x7fff,%d1
9444 	or.w		%d2,%d1			# concat new sign,new exp
9445 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
9446 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
9447 	mov.l		(%sp)+,%d2		# restore d2
9448 	bra.b		fneg_sd_unfl_dis
9449 
9450 #
9451 # operand WILL overflow.
9452 #
9453 fneg_sd_ovfl:
9454 	fmov.l		&0x0,%fpsr		# clear FPSR
9455 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9456 
9457 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9458 
9459 	fmov.l		&0x0,%fpcr		# clear FPCR
9460 	fmov.l		%fpsr,%d1		# save FPSR
9461 
9462 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9463 
9464 fneg_sd_ovfl_tst:
9465 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466 
9467 	mov.b		FPCR_ENABLE(%a6),%d1
9468 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
9469 	bne.b		fneg_sd_ovfl_ena	# yes
9470 
9471 #
9472 # OVFL is not enabled; therefore, we must create the default result by
9473 # calling ovf_res().
9474 #
9475 fneg_sd_ovfl_dis:
9476 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
9477 	sne		%d1			# set sign param accordingly
9478 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
9479 	bsr.l		ovf_res			# calculate default result
9480 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
9481 	fmovm.x		(%a0),&0x80		# return default result in fp0
9482 	rts
9483 
9484 #
9485 # OVFL is enabled.
9486 # the INEX2 bit has already been updated by the round to the correct precision.
9487 # now, round to extended(and don't alter the FPSR).
9488 #
9489 fneg_sd_ovfl_ena:
9490 	mov.l		%d2,-(%sp)		# save d2
9491 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9492 	mov.l		%d1,%d2			# make a copy
9493 	andi.l		&0x7fff,%d1		# strip sign
9494 	andi.w		&0x8000,%d2		# keep old sign
9495 	sub.l		%d0,%d1			# add scale factor
9496 	subi.l		&0x6000,%d1		# subtract bias
9497 	andi.w		&0x7fff,%d1
9498 	or.w		%d2,%d1			# concat sign,exp
9499 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
9500 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9501 	mov.l		(%sp)+,%d2		# restore d2
9502 	bra.b		fneg_sd_ovfl_dis
9503 
9504 #
9505 # the move in MAY underflow. so...
9506 #
9507 fneg_sd_may_ovfl:
9508 	fmov.l		&0x0,%fpsr		# clear FPSR
9509 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9510 
9511 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9512 
9513 	fmov.l		%fpsr,%d1		# save status
9514 	fmov.l		&0x0,%fpcr		# clear FPCR
9515 
9516 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9517 
9518 	fabs.x		%fp0,%fp1		# make a copy of result
9519 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
9520 	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
9521 
9522 # no, it didn't overflow; we have correct result
9523 	bra.w		fneg_sd_normal_exit
9524 
9525 ##########################################################################
9526 
9527 #
9528 # input is not normalized; what is it?
9529 #
9530 fneg_not_norm:
9531 	cmpi.b		%d1,&DENORM		# weed out DENORM
9532 	beq.w		fneg_denorm
9533 	cmpi.b		%d1,&SNAN		# weed out SNAN
9534 	beq.l		res_snan_1op
9535 	cmpi.b		%d1,&QNAN		# weed out QNAN
9536 	beq.l		res_qnan_1op
9537 
9538 #
9539 # do the fneg; at this point, only possible ops are ZERO and INF.
9540 # use fneg to determine ccodes.
9541 # prec:mode should be zero at this point but it won't affect answer anyways.
9542 #
9543 	fneg.x		SRC_EX(%a0),%fp0	# do fneg
9544 	fmov.l		%fpsr,%d0
9545 	rol.l		&0x8,%d0		# put ccodes in lo byte
9546 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
9547 	rts
9548 
9549 #########################################################################
9550 # XDEF ****************************************************************	#
9551 #	ftst(): emulates the ftest instruction				#
9552 #									#
9553 # XREF ****************************************************************	#
9554 #	res{s,q}nan_1op() - set NAN result for monadic instruction	#
9555 #									#
9556 # INPUT ***************************************************************	#
9557 #	a0 = pointer to extended precision source operand		#
9558 #									#
9559 # OUTPUT **************************************************************	#
9560 #	none								#
9561 #									#
9562 # ALGORITHM ***********************************************************	#
9563 #	Check the source operand tag (STAG) and set the FPCR according	#
9564 # to the operand type and sign.						#
9565 #									#
9566 #########################################################################
9567 
9568 	global		ftst
9569 ftst:
9570 	mov.b		STAG(%a6),%d1
9571 	bne.b		ftst_not_norm		# optimize on non-norm input
9572 
9573 #
9574 # Norm:
9575 #
9576 ftst_norm:
9577 	tst.b		SRC_EX(%a0)		# is operand negative?
9578 	bmi.b		ftst_norm_m		# yes
9579 	rts
9580 ftst_norm_m:
9581 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9582 	rts
9583 
9584 #
9585 # input is not normalized; what is it?
9586 #
9587 ftst_not_norm:
9588 	cmpi.b		%d1,&ZERO		# weed out ZERO
9589 	beq.b		ftst_zero
9590 	cmpi.b		%d1,&INF		# weed out INF
9591 	beq.b		ftst_inf
9592 	cmpi.b		%d1,&SNAN		# weed out SNAN
9593 	beq.l		res_snan_1op
9594 	cmpi.b		%d1,&QNAN		# weed out QNAN
9595 	beq.l		res_qnan_1op
9596 
9597 #
9598 # Denorm:
9599 #
9600 ftst_denorm:
9601 	tst.b		SRC_EX(%a0)		# is operand negative?
9602 	bmi.b		ftst_denorm_m		# yes
9603 	rts
9604 ftst_denorm_m:
9605 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9606 	rts
9607 
9608 #
9609 # Infinity:
9610 #
9611 ftst_inf:
9612 	tst.b		SRC_EX(%a0)		# is operand negative?
9613 	bmi.b		ftst_inf_m		# yes
9614 ftst_inf_p:
9615 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9616 	rts
9617 ftst_inf_m:
9618 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619 	rts
9620 
9621 #
9622 # Zero:
9623 #
9624 ftst_zero:
9625 	tst.b		SRC_EX(%a0)		# is operand negative?
9626 	bmi.b		ftst_zero_m		# yes
9627 ftst_zero_p:
9628 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9629 	rts
9630 ftst_zero_m:
9631 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
9632 	rts
9633 
9634 #########################################################################
9635 # XDEF ****************************************************************	#
9636 #	fint(): emulates the fint instruction				#
9637 #									#
9638 # XREF ****************************************************************	#
9639 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9640 #									#
9641 # INPUT ***************************************************************	#
9642 #	a0 = pointer to extended precision source operand		#
9643 #	d0 = round precision/mode					#
9644 #									#
9645 # OUTPUT **************************************************************	#
9646 #	fp0 = result							#
9647 #									#
9648 # ALGORITHM ***********************************************************	#
9649 #	Separate according to operand type. Unnorms don't pass through	#
9650 # here. For norms, load the rounding mode/prec, execute a "fint", then	#
9651 # store the resulting FPSR bits.					#
9652 #	For denorms, force the j-bit to a one and do the same as for	#
9653 # norms. Denorms are so low that the answer will either be a zero or a	#
9654 # one.									#
9655 #	For zeroes/infs/NANs, return the same while setting the FPSR	#
9656 # as appropriate.							#
9657 #									#
9658 #########################################################################
9659 
9660 	global		fint
9661 fint:
9662 	mov.b		STAG(%a6),%d1
9663 	bne.b		fint_not_norm		# optimize on non-norm input
9664 
9665 #
9666 # Norm:
9667 #
9668 fint_norm:
9669 	andi.b		&0x30,%d0		# set prec = ext
9670 
9671 	fmov.l		%d0,%fpcr		# set FPCR
9672 	fmov.l		&0x0,%fpsr		# clear FPSR
9673 
9674 	fint.x		SRC(%a0),%fp0		# execute fint
9675 
9676 	fmov.l		&0x0,%fpcr		# clear FPCR
9677 	fmov.l		%fpsr,%d0		# save FPSR
9678 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9679 
9680 	rts
9681 
9682 #
9683 # input is not normalized; what is it?
9684 #
9685 fint_not_norm:
9686 	cmpi.b		%d1,&ZERO		# weed out ZERO
9687 	beq.b		fint_zero
9688 	cmpi.b		%d1,&INF		# weed out INF
9689 	beq.b		fint_inf
9690 	cmpi.b		%d1,&DENORM		# weed out DENORM
9691 	beq.b		fint_denorm
9692 	cmpi.b		%d1,&SNAN		# weed out SNAN
9693 	beq.l		res_snan_1op
9694 	bra.l		res_qnan_1op		# weed out QNAN
9695 
9696 #
9697 # Denorm:
9698 #
9699 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700 # also, the INEX2 and AINEX exception bits will be set.
9701 # so, we could either set these manually or force the DENORM
9702 # to a very small NORM and ship it to the NORM routine.
9703 # I do the latter.
9704 #
9705 fint_denorm:
9706 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9708 	lea		FP_SCR0(%a6),%a0
9709 	bra.b		fint_norm
9710 
9711 #
9712 # Zero:
9713 #
9714 fint_zero:
9715 	tst.b		SRC_EX(%a0)		# is ZERO negative?
9716 	bmi.b		fint_zero_m		# yes
9717 fint_zero_p:
9718 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9719 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9720 	rts
9721 fint_zero_m:
9722 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9723 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724 	rts
9725 
9726 #
9727 # Infinity:
9728 #
9729 fint_inf:
9730 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9731 	tst.b		SRC_EX(%a0)		# is INF negative?
9732 	bmi.b		fint_inf_m		# yes
9733 fint_inf_p:
9734 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9735 	rts
9736 fint_inf_m:
9737 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738 	rts
9739 
9740 #########################################################################
9741 # XDEF ****************************************************************	#
9742 #	fintrz(): emulates the fintrz instruction			#
9743 #									#
9744 # XREF ****************************************************************	#
9745 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9746 #									#
9747 # INPUT ***************************************************************	#
9748 #	a0 = pointer to extended precision source operand		#
9749 #	d0 = round precision/mode					#
9750 #									#
9751 # OUTPUT **************************************************************	#
9752 #	fp0 = result							#
9753 #									#
9754 # ALGORITHM ***********************************************************	#
9755 #	Separate according to operand type. Unnorms don't pass through	#
9756 # here. For norms, load the rounding mode/prec, execute a "fintrz",	#
9757 # then store the resulting FPSR bits.					#
9758 #	For denorms, force the j-bit to a one and do the same as for	#
9759 # norms. Denorms are so low that the answer will either be a zero or a	#
9760 # one.									#
9761 #	For zeroes/infs/NANs, return the same while setting the FPSR	#
9762 # as appropriate.							#
9763 #									#
9764 #########################################################################
9765 
9766 	global		fintrz
9767 fintrz:
9768 	mov.b		STAG(%a6),%d1
9769 	bne.b		fintrz_not_norm		# optimize on non-norm input
9770 
9771 #
9772 # Norm:
9773 #
9774 fintrz_norm:
9775 	fmov.l		&0x0,%fpsr		# clear FPSR
9776 
9777 	fintrz.x	SRC(%a0),%fp0		# execute fintrz
9778 
9779 	fmov.l		%fpsr,%d0		# save FPSR
9780 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9781 
9782 	rts
9783 
9784 #
9785 # input is not normalized; what is it?
9786 #
9787 fintrz_not_norm:
9788 	cmpi.b		%d1,&ZERO		# weed out ZERO
9789 	beq.b		fintrz_zero
9790 	cmpi.b		%d1,&INF		# weed out INF
9791 	beq.b		fintrz_inf
9792 	cmpi.b		%d1,&DENORM		# weed out DENORM
9793 	beq.b		fintrz_denorm
9794 	cmpi.b		%d1,&SNAN		# weed out SNAN
9795 	beq.l		res_snan_1op
9796 	bra.l		res_qnan_1op		# weed out QNAN
9797 
9798 #
9799 # Denorm:
9800 #
9801 # for DENORMs, the result will be (+/-)ZERO.
9802 # also, the INEX2 and AINEX exception bits will be set.
9803 # so, we could either set these manually or force the DENORM
9804 # to a very small NORM and ship it to the NORM routine.
9805 # I do the latter.
9806 #
9807 fintrz_denorm:
9808 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9810 	lea		FP_SCR0(%a6),%a0
9811 	bra.b		fintrz_norm
9812 
9813 #
9814 # Zero:
9815 #
9816 fintrz_zero:
9817 	tst.b		SRC_EX(%a0)		# is ZERO negative?
9818 	bmi.b		fintrz_zero_m		# yes
9819 fintrz_zero_p:
9820 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9821 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9822 	rts
9823 fintrz_zero_m:
9824 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9825 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826 	rts
9827 
9828 #
9829 # Infinity:
9830 #
9831 fintrz_inf:
9832 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9833 	tst.b		SRC_EX(%a0)		# is INF negative?
9834 	bmi.b		fintrz_inf_m		# yes
9835 fintrz_inf_p:
9836 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9837 	rts
9838 fintrz_inf_m:
9839 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840 	rts
9841 
9842 #########################################################################
9843 # XDEF ****************************************************************	#
9844 #	fabs():  emulates the fabs instruction				#
9845 #	fsabs(): emulates the fsabs instruction				#
9846 #	fdabs(): emulates the fdabs instruction				#
9847 #									#
9848 # XREF **************************************************************** #
9849 #	norm() - normalize denorm mantissa to provide EXOP		#
9850 #	scale_to_zero_src() - make exponent. = 0; get scale factor	#
9851 #	unf_res() - calculate underflow result				#
9852 #	ovf_res() - calculate overflow result				#
9853 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9854 #									#
9855 # INPUT *************************************************************** #
9856 #	a0 = pointer to extended precision source operand		#
9857 #	d0 = rnd precision/mode						#
9858 #									#
9859 # OUTPUT ************************************************************** #
9860 #	fp0 = result							#
9861 #	fp1 = EXOP (if exception occurred)				#
9862 #									#
9863 # ALGORITHM ***********************************************************	#
9864 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
9865 # norms into extended, single, and double precision.			#
9866 #	Simply clear sign for extended precision norm. Ext prec denorm	#
9867 # gets an EXOP created for it since it's an underflow.			#
9868 #	Double and single precision can overflow and underflow. First,	#
9869 # scale the operand such that the exponent is zero. Perform an "fabs"	#
9870 # using the correct rnd mode/prec. Check to see if the original		#
9871 # exponent would take an exception. If so, use unf_res() or ovf_res()	#
9872 # to calculate the default result. Also, create the EXOP for the	#
9873 # exceptional case. If no exception should occur, insert the correct	#
9874 # result exponent and return.						#
9875 #	Unnorms don't pass through here.				#
9876 #									#
9877 #########################################################################
9878 
9879 	global		fsabs
9880 fsabs:
9881 	andi.b		&0x30,%d0		# clear rnd prec
9882 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9883 	bra.b		fabs
9884 
9885 	global		fdabs
9886 fdabs:
9887 	andi.b		&0x30,%d0		# clear rnd prec
9888 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
9889 
9890 	global		fabs
9891 fabs:
9892 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9893 	mov.b		STAG(%a6),%d1
9894 	bne.w		fabs_not_norm		# optimize on non-norm input
9895 
9896 #
9897 # ABSOLUTE VALUE: norms and denorms ONLY!
9898 #
9899 fabs_norm:
9900 	andi.b		&0xc0,%d0		# is precision extended?
9901 	bne.b		fabs_not_ext		# no; go handle sgl or dbl
9902 
9903 #
9904 # precision selected is extended. so...we can not get an underflow
9905 # or overflow because of rounding to the correct precision. so...
9906 # skip the scaling and unscaling...
9907 #
9908 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9909 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9910 	mov.w		SRC_EX(%a0),%d1
9911 	bclr		&15,%d1			# force absolute value
9912 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
9913 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9914 	rts
9915 
9916 #
9917 # for an extended precision DENORM, the UNFL exception bit is set
9918 # the accrued bit is NOT set in this instance(no inexactness!)
9919 #
9920 fabs_denorm:
9921 	andi.b		&0xc0,%d0		# is precision extended?
9922 	bne.b		fabs_not_ext		# no
9923 
9924 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925 
9926 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9927 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9928 	mov.w		SRC_EX(%a0),%d0
9929 	bclr		&15,%d0			# clear sign
9930 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
9931 
9932 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9933 
9934 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935 	bne.b		fabs_ext_unfl_ena
9936 	rts
9937 
9938 #
9939 # the input is an extended DENORM and underflow is enabled in the FPCR.
9940 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941 # exponent and insert back into the operand.
9942 #
9943 fabs_ext_unfl_ena:
9944 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9945 	bsr.l		norm			# normalize result
9946 	neg.w		%d0			# new exponent = -(shft val)
9947 	addi.w		&0x6000,%d0		# add new bias to exponent
9948 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9949 	andi.w		&0x8000,%d1		# keep old sign
9950 	andi.w		&0x7fff,%d0		# clear sign position
9951 	or.w		%d1,%d0			# concat old sign, new exponent
9952 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9953 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9954 	rts
9955 
9956 #
9957 # operand is either single or double
9958 #
9959 fabs_not_ext:
9960 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9961 	bne.b		fabs_dbl
9962 
9963 #
9964 # operand is to be rounded to single precision
9965 #
9966 fabs_sgl:
9967 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9968 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9969 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9970 	bsr.l		scale_to_zero_src	# calculate scale factor
9971 
9972 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9973 	bge.w		fabs_sd_unfl		# yes; go handle underflow
9974 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9975 	beq.w		fabs_sd_may_ovfl	# maybe; go check
9976 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
9977 
9978 #
9979 # operand will NOT overflow or underflow when moved in to the fp reg file
9980 #
9981 fabs_sd_normal:
9982 	fmov.l		&0x0,%fpsr		# clear FPSR
9983 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9984 
9985 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
9986 
9987 	fmov.l		%fpsr,%d1		# save FPSR
9988 	fmov.l		&0x0,%fpcr		# clear FPCR
9989 
9990 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9991 
9992 fabs_sd_normal_exit:
9993 	mov.l		%d2,-(%sp)		# save d2
9994 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9995 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9996 	mov.l		%d1,%d2			# make a copy
9997 	andi.l		&0x7fff,%d1		# strip sign
9998 	sub.l		%d0,%d1			# add scale factor
9999 	andi.w		&0x8000,%d2		# keep old sign
10000 	or.w		%d1,%d2			# concat old sign,new exp
10001 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
10002 	mov.l		(%sp)+,%d2		# restore d2
10003 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10004 	rts
10005 
10006 #
10007 # operand is to be rounded to double precision
10008 #
10009 fabs_dbl:
10010 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10011 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10012 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10013 	bsr.l		scale_to_zero_src	# calculate scale factor
10014 
10015 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
10016 	bge.b		fabs_sd_unfl		# yes; go handle underflow
10017 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
10018 	beq.w		fabs_sd_may_ovfl	# maybe; go check
10019 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
10020 	bra.w		fabs_sd_normal		# no; ho handle normalized op
10021 
10022 #
10023 # operand WILL underflow when moved in to the fp register file
10024 #
10025 fabs_sd_unfl:
10026 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027 
10028 	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
10029 
10030 # if underflow or inexact is enabled, go calculate EXOP first.
10031 	mov.b		FPCR_ENABLE(%a6),%d1
10032 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10033 	bne.b		fabs_sd_unfl_ena	# yes
10034 
10035 fabs_sd_unfl_dis:
10036 	lea		FP_SCR0(%a6),%a0	# pass: result addr
10037 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10038 	bsr.l		unf_res			# calculate default result
10039 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
10040 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10041 	rts
10042 
10043 #
10044 # operand will underflow AND underflow is enabled.
10045 # Therefore, we must return the result rounded to extended precision.
10046 #
10047 fabs_sd_unfl_ena:
10048 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
10051 
10052 	mov.l		%d2,-(%sp)		# save d2
10053 	mov.l		%d1,%d2			# make a copy
10054 	andi.l		&0x7fff,%d1		# strip sign
10055 	andi.w		&0x8000,%d2		# keep old sign
10056 	sub.l		%d0,%d1			# subtract scale factor
10057 	addi.l		&0x6000,%d1		# add new bias
10058 	andi.w		&0x7fff,%d1
10059 	or.w		%d2,%d1			# concat new sign,new exp
10060 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
10061 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
10062 	mov.l		(%sp)+,%d2		# restore d2
10063 	bra.b		fabs_sd_unfl_dis
10064 
10065 #
10066 # operand WILL overflow.
10067 #
10068 fabs_sd_ovfl:
10069 	fmov.l		&0x0,%fpsr		# clear FPSR
10070 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10071 
10072 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10073 
10074 	fmov.l		&0x0,%fpcr		# clear FPCR
10075 	fmov.l		%fpsr,%d1		# save FPSR
10076 
10077 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10078 
10079 fabs_sd_ovfl_tst:
10080 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081 
10082 	mov.b		FPCR_ENABLE(%a6),%d1
10083 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10084 	bne.b		fabs_sd_ovfl_ena	# yes
10085 
10086 #
10087 # OVFL is not enabled; therefore, we must create the default result by
10088 # calling ovf_res().
10089 #
10090 fabs_sd_ovfl_dis:
10091 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10092 	sne		%d1			# set sign param accordingly
10093 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
10094 	bsr.l		ovf_res			# calculate default result
10095 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10096 	fmovm.x		(%a0),&0x80		# return default result in fp0
10097 	rts
10098 
10099 #
10100 # OVFL is enabled.
10101 # the INEX2 bit has already been updated by the round to the correct precision.
10102 # now, round to extended(and don't alter the FPSR).
10103 #
10104 fabs_sd_ovfl_ena:
10105 	mov.l		%d2,-(%sp)		# save d2
10106 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10107 	mov.l		%d1,%d2			# make a copy
10108 	andi.l		&0x7fff,%d1		# strip sign
10109 	andi.w		&0x8000,%d2		# keep old sign
10110 	sub.l		%d0,%d1			# add scale factor
10111 	subi.l		&0x6000,%d1		# subtract bias
10112 	andi.w		&0x7fff,%d1
10113 	or.w		%d2,%d1			# concat sign,exp
10114 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10115 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10116 	mov.l		(%sp)+,%d2		# restore d2
10117 	bra.b		fabs_sd_ovfl_dis
10118 
10119 #
10120 # the move in MAY underflow. so...
10121 #
10122 fabs_sd_may_ovfl:
10123 	fmov.l		&0x0,%fpsr		# clear FPSR
10124 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10125 
10126 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10127 
10128 	fmov.l		%fpsr,%d1		# save status
10129 	fmov.l		&0x0,%fpcr		# clear FPCR
10130 
10131 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10132 
10133 	fabs.x		%fp0,%fp1		# make a copy of result
10134 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10135 	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
10136 
10137 # no, it didn't overflow; we have correct result
10138 	bra.w		fabs_sd_normal_exit
10139 
10140 ##########################################################################
10141 
10142 #
10143 # input is not normalized; what is it?
10144 #
10145 fabs_not_norm:
10146 	cmpi.b		%d1,&DENORM		# weed out DENORM
10147 	beq.w		fabs_denorm
10148 	cmpi.b		%d1,&SNAN		# weed out SNAN
10149 	beq.l		res_snan_1op
10150 	cmpi.b		%d1,&QNAN		# weed out QNAN
10151 	beq.l		res_qnan_1op
10152 
10153 	fabs.x		SRC(%a0),%fp0		# force absolute value
10154 
10155 	cmpi.b		%d1,&INF		# weed out INF
10156 	beq.b		fabs_inf
10157 fabs_zero:
10158 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10159 	rts
10160 fabs_inf:
10161 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
10162 	rts
10163 
10164 #########################################################################
10165 # XDEF ****************************************************************	#
10166 #	fcmp(): fp compare op routine					#
10167 #									#
10168 # XREF ****************************************************************	#
10169 #	res_qnan() - return QNAN result					#
10170 #	res_snan() - return SNAN result					#
10171 #									#
10172 # INPUT ***************************************************************	#
10173 #	a0 = pointer to extended precision source operand		#
10174 #	a1 = pointer to extended precision destination operand		#
10175 #	d0 = round prec/mode						#
10176 #									#
10177 # OUTPUT ************************************************************** #
10178 #	None								#
10179 #									#
10180 # ALGORITHM ***********************************************************	#
10181 #	Handle NANs and denorms as special cases. For everything else,	#
10182 # just use the actual fcmp instruction to produce the correct condition	#
10183 # codes.								#
10184 #									#
10185 #########################################################################
10186 
10187 	global		fcmp
10188 fcmp:
10189 	clr.w		%d1
10190 	mov.b		DTAG(%a6),%d1
10191 	lsl.b		&0x3,%d1
10192 	or.b		STAG(%a6),%d1
10193 	bne.b		fcmp_not_norm		# optimize on non-norm input
10194 
10195 #
10196 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197 #
10198 fcmp_norm:
10199 	fmovm.x		DST(%a1),&0x80		# load dst op
10200 
10201 	fcmp.x		%fp0,SRC(%a0)		# do compare
10202 
10203 	fmov.l		%fpsr,%d0		# save FPSR
10204 	rol.l		&0x8,%d0		# extract ccode bits
10205 	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
10206 
10207 	rts
10208 
10209 #
10210 # fcmp: inputs are not both normalized; what are they?
10211 #
10212 fcmp_not_norm:
10213 	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214 	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
10215 
10216 	swbeg		&48
10217 tbl_fcmp_op:
10218 	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
10219 	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
10220 	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
10221 	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
10222 	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
10223 	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
10224 	short		tbl_fcmp_op	- tbl_fcmp_op #
10225 	short		tbl_fcmp_op	- tbl_fcmp_op #
10226 
10227 	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
10228 	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
10229 	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
10230 	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
10231 	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
10232 	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
10233 	short		tbl_fcmp_op	- tbl_fcmp_op #
10234 	short		tbl_fcmp_op	- tbl_fcmp_op #
10235 
10236 	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
10237 	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
10238 	short		fcmp_norm	- tbl_fcmp_op # INF - INF
10239 	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
10240 	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
10241 	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
10242 	short		tbl_fcmp_op	- tbl_fcmp_op #
10243 	short		tbl_fcmp_op	- tbl_fcmp_op #
10244 
10245 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
10246 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
10247 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
10248 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
10249 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
10250 	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
10251 	short		tbl_fcmp_op	- tbl_fcmp_op #
10252 	short		tbl_fcmp_op	- tbl_fcmp_op #
10253 
10254 	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
10255 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
10256 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
10257 	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
10258 	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
10259 	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
10260 	short		tbl_fcmp_op	- tbl_fcmp_op #
10261 	short		tbl_fcmp_op	- tbl_fcmp_op #
10262 
10263 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
10264 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
10265 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
10266 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
10267 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
10268 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
10269 	short		tbl_fcmp_op	- tbl_fcmp_op #
10270 	short		tbl_fcmp_op	- tbl_fcmp_op #
10271 
10272 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274 fcmp_res_qnan:
10275 	bsr.l		res_qnan
10276 	andi.b		&0xf7,FPSR_CC(%a6)
10277 	rts
10278 fcmp_res_snan:
10279 	bsr.l		res_snan
10280 	andi.b		&0xf7,FPSR_CC(%a6)
10281 	rts
10282 
10283 #
10284 # DENORMs are a little more difficult.
10285 # If you have a 2 DENORMs, then you can just force the j-bit to a one
10286 # and use the fcmp_norm routine.
10287 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288 # and use the fcmp_norm routine.
10289 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291 # (1) signs are (+) and the DENORM is the dst or
10292 # (2) signs are (-) and the DENORM is the src
10293 #
10294 
10295 fcmp_dnrm_s:
10296 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10297 	mov.l		SRC_HI(%a0),%d0
10298 	bset		&31,%d0			# DENORM src; make into small norm
10299 	mov.l		%d0,FP_SCR0_HI(%a6)
10300 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10301 	lea		FP_SCR0(%a6),%a0
10302 	bra.w		fcmp_norm
10303 
10304 fcmp_dnrm_d:
10305 	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
10306 	mov.l		DST_HI(%a1),%d0
10307 	bset		&31,%d0			# DENORM src; make into small norm
10308 	mov.l		%d0,FP_SCR0_HI(%a6)
10309 	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
10310 	lea		FP_SCR0(%a6),%a1
10311 	bra.w		fcmp_norm
10312 
10313 fcmp_dnrm_sd:
10314 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10315 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10316 	mov.l		DST_HI(%a1),%d0
10317 	bset		&31,%d0			# DENORM dst; make into small norm
10318 	mov.l		%d0,FP_SCR1_HI(%a6)
10319 	mov.l		SRC_HI(%a0),%d0
10320 	bset		&31,%d0			# DENORM dst; make into small norm
10321 	mov.l		%d0,FP_SCR0_HI(%a6)
10322 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10323 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10324 	lea		FP_SCR1(%a6),%a1
10325 	lea		FP_SCR0(%a6),%a0
10326 	bra.w		fcmp_norm
10327 
10328 fcmp_nrm_dnrm:
10329 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10330 	mov.b		DST_EX(%a1),%d1
10331 	eor.b		%d0,%d1
10332 	bmi.w		fcmp_dnrm_s
10333 
10334 # signs are the same, so must determine the answer ourselves.
10335 	tst.b		%d0			# is src op negative?
10336 	bmi.b		fcmp_nrm_dnrm_m		# yes
10337 	rts
10338 fcmp_nrm_dnrm_m:
10339 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10340 	rts
10341 
10342 fcmp_dnrm_nrm:
10343 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10344 	mov.b		DST_EX(%a1),%d1
10345 	eor.b		%d0,%d1
10346 	bmi.w		fcmp_dnrm_d
10347 
10348 # signs are the same, so must determine the answer ourselves.
10349 	tst.b		%d0			# is src op negative?
10350 	bpl.b		fcmp_dnrm_nrm_m		# no
10351 	rts
10352 fcmp_dnrm_nrm_m:
10353 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10354 	rts
10355 
10356 #########################################################################
10357 # XDEF ****************************************************************	#
10358 #	fsglmul(): emulates the fsglmul instruction			#
10359 #									#
10360 # XREF ****************************************************************	#
10361 #	scale_to_zero_src() - scale src exponent to zero		#
10362 #	scale_to_zero_dst() - scale dst exponent to zero		#
10363 #	unf_res4() - return default underflow result for sglop		#
10364 #	ovf_res() - return default overflow result			#
10365 #	res_qnan() - return QNAN result					#
10366 #	res_snan() - return SNAN result					#
10367 #									#
10368 # INPUT ***************************************************************	#
10369 #	a0 = pointer to extended precision source operand		#
10370 #	a1 = pointer to extended precision destination operand		#
10371 #	d0  rnd prec,mode						#
10372 #									#
10373 # OUTPUT **************************************************************	#
10374 #	fp0 = result							#
10375 #	fp1 = EXOP (if exception occurred)				#
10376 #									#
10377 # ALGORITHM ***********************************************************	#
10378 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
10379 # norms/denorms into ext/sgl/dbl precision.				#
10380 #	For norms/denorms, scale the exponents such that a multiply	#
10381 # instruction won't cause an exception. Use the regular fsglmul to	#
10382 # compute a result. Check if the regular operands would have taken	#
10383 # an exception. If so, return the default overflow/underflow result	#
10384 # and return the EXOP if exceptions are enabled. Else, scale the	#
10385 # result operand to the proper exponent.				#
10386 #									#
10387 #########################################################################
10388 
10389 	global		fsglmul
10390 fsglmul:
10391 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10392 
10393 	clr.w		%d1
10394 	mov.b		DTAG(%a6),%d1
10395 	lsl.b		&0x3,%d1
10396 	or.b		STAG(%a6),%d1
10397 
10398 	bne.w		fsglmul_not_norm	# optimize on non-norm input
10399 
10400 fsglmul_norm:
10401 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10402 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10403 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10404 
10405 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10406 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10407 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10408 
10409 	bsr.l		scale_to_zero_src	# scale exponent
10410 	mov.l		%d0,-(%sp)		# save scale factor 1
10411 
10412 	bsr.l		scale_to_zero_dst	# scale dst exponent
10413 
10414 	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
10415 
10416 	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
10417 	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
10418 	blt.w		fsglmul_ovfl		# result will overflow
10419 
10420 	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
10421 	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
10422 	bgt.w		fsglmul_unfl		# result will underflow
10423 
10424 fsglmul_normal:
10425 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10426 
10427 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10428 	fmov.l		&0x0,%fpsr		# clear FPSR
10429 
10430 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10431 
10432 	fmov.l		%fpsr,%d1		# save status
10433 	fmov.l		&0x0,%fpcr		# clear FPCR
10434 
10435 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10436 
10437 fsglmul_normal_exit:
10438 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10439 	mov.l		%d2,-(%sp)		# save d2
10440 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10441 	mov.l		%d1,%d2			# make a copy
10442 	andi.l		&0x7fff,%d1		# strip sign
10443 	andi.w		&0x8000,%d2		# keep old sign
10444 	sub.l		%d0,%d1			# add scale factor
10445 	or.w		%d2,%d1			# concat old sign,new exp
10446 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10447 	mov.l		(%sp)+,%d2		# restore d2
10448 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10449 	rts
10450 
10451 fsglmul_ovfl:
10452 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10453 
10454 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10455 	fmov.l		&0x0,%fpsr		# clear FPSR
10456 
10457 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10458 
10459 	fmov.l		%fpsr,%d1		# save status
10460 	fmov.l		&0x0,%fpcr		# clear FPCR
10461 
10462 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10463 
10464 fsglmul_ovfl_tst:
10465 
10466 # save setting this until now because this is where fsglmul_may_ovfl may jump in
10467 	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468 
10469 	mov.b		FPCR_ENABLE(%a6),%d1
10470 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10471 	bne.b		fsglmul_ovfl_ena	# yes
10472 
10473 fsglmul_ovfl_dis:
10474 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10475 	sne		%d1			# set sign param accordingly
10476 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10477 	andi.b		&0x30,%d0		# force prec = ext
10478 	bsr.l		ovf_res			# calculate default result
10479 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10480 	fmovm.x		(%a0),&0x80		# return default result in fp0
10481 	rts
10482 
10483 fsglmul_ovfl_ena:
10484 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10485 
10486 	mov.l		%d2,-(%sp)		# save d2
10487 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10488 	mov.l		%d1,%d2			# make a copy
10489 	andi.l		&0x7fff,%d1		# strip sign
10490 	sub.l		%d0,%d1			# add scale factor
10491 	subi.l		&0x6000,%d1		# subtract bias
10492 	andi.w		&0x7fff,%d1
10493 	andi.w		&0x8000,%d2		# keep old sign
10494 	or.w		%d2,%d1			# concat old sign,new exp
10495 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10496 	mov.l		(%sp)+,%d2		# restore d2
10497 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10498 	bra.b		fsglmul_ovfl_dis
10499 
10500 fsglmul_may_ovfl:
10501 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10502 
10503 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10504 	fmov.l		&0x0,%fpsr		# clear FPSR
10505 
10506 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10507 
10508 	fmov.l		%fpsr,%d1		# save status
10509 	fmov.l		&0x0,%fpcr		# clear FPCR
10510 
10511 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10512 
10513 	fabs.x		%fp0,%fp1		# make a copy of result
10514 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10515 	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
10516 
10517 # no, it didn't overflow; we have correct result
10518 	bra.w		fsglmul_normal_exit
10519 
10520 fsglmul_unfl:
10521 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522 
10523 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10524 
10525 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10526 	fmov.l		&0x0,%fpsr		# clear FPSR
10527 
10528 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10529 
10530 	fmov.l		%fpsr,%d1		# save status
10531 	fmov.l		&0x0,%fpcr		# clear FPCR
10532 
10533 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10534 
10535 	mov.b		FPCR_ENABLE(%a6),%d1
10536 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10537 	bne.b		fsglmul_unfl_ena	# yes
10538 
10539 fsglmul_unfl_dis:
10540 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10541 
10542 	lea		FP_SCR0(%a6),%a0	# pass: result addr
10543 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10544 	bsr.l		unf_res4		# calculate default result
10545 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10546 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10547 	rts
10548 
10549 #
10550 # UNFL is enabled.
10551 #
10552 fsglmul_unfl_ena:
10553 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10554 
10555 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10556 	fmov.l		&0x0,%fpsr		# clear FPSR
10557 
10558 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10559 
10560 	fmov.l		&0x0,%fpcr		# clear FPCR
10561 
10562 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10563 	mov.l		%d2,-(%sp)		# save d2
10564 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10565 	mov.l		%d1,%d2			# make a copy
10566 	andi.l		&0x7fff,%d1		# strip sign
10567 	andi.w		&0x8000,%d2		# keep old sign
10568 	sub.l		%d0,%d1			# add scale factor
10569 	addi.l		&0x6000,%d1		# add bias
10570 	andi.w		&0x7fff,%d1
10571 	or.w		%d2,%d1			# concat old sign,new exp
10572 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10573 	mov.l		(%sp)+,%d2		# restore d2
10574 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10575 	bra.w		fsglmul_unfl_dis
10576 
10577 fsglmul_may_unfl:
10578 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10579 
10580 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10581 	fmov.l		&0x0,%fpsr		# clear FPSR
10582 
10583 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10584 
10585 	fmov.l		%fpsr,%d1		# save status
10586 	fmov.l		&0x0,%fpcr		# clear FPCR
10587 
10588 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10589 
10590 	fabs.x		%fp0,%fp1		# make a copy of result
10591 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
10592 	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
10593 	fblt.w		fsglmul_unfl		# yes; underflow occurred
10594 
10595 #
10596 # we still don't know if underflow occurred. result is ~ equal to 2. but,
10597 # we don't know if the result was an underflow that rounded up to a 2 or
10598 # a normalized number that rounded down to a 2. so, redo the entire operation
10599 # using RZ as the rounding mode to see what the pre-rounded result is.
10600 # this case should be relatively rare.
10601 #
10602 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
10603 
10604 	mov.l		L_SCR3(%a6),%d1
10605 	andi.b		&0xc0,%d1		# keep rnd prec
10606 	ori.b		&rz_mode*0x10,%d1	# insert RZ
10607 
10608 	fmov.l		%d1,%fpcr		# set FPCR
10609 	fmov.l		&0x0,%fpsr		# clear FPSR
10610 
10611 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10612 
10613 	fmov.l		&0x0,%fpcr		# clear FPCR
10614 	fabs.x		%fp1			# make absolute value
10615 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
10616 	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
10617 	bra.w		fsglmul_unfl		# yes, underflow occurred
10618 
10619 ##############################################################################
10620 
10621 #
10622 # Single Precision Multiply: inputs are not both normalized; what are they?
10623 #
10624 fsglmul_not_norm:
10625 	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626 	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
10627 
10628 	swbeg		&48
10629 tbl_fsglmul_op:
10630 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10631 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10632 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10633 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10634 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10635 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10636 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10637 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10638 
10639 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
10640 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
10641 	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
10642 	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
10643 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
10644 	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
10645 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10646 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10647 
10648 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
10649 	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
10650 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
10651 	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
10652 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
10653 	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
10654 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10655 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10656 
10657 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
10658 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
10659 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
10660 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
10661 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
10662 	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
10663 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10664 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10665 
10666 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10667 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10668 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10669 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10670 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10671 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10672 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10673 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10674 
10675 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
10676 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
10677 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
10678 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
10679 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
10680 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
10681 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10682 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10683 
10684 fsglmul_res_operr:
10685 	bra.l		res_operr
10686 fsglmul_res_snan:
10687 	bra.l		res_snan
10688 fsglmul_res_qnan:
10689 	bra.l		res_qnan
10690 fsglmul_zero:
10691 	bra.l		fmul_zero
10692 fsglmul_inf_src:
10693 	bra.l		fmul_inf_src
10694 fsglmul_inf_dst:
10695 	bra.l		fmul_inf_dst
10696 
10697 #########################################################################
10698 # XDEF ****************************************************************	#
10699 #	fsgldiv(): emulates the fsgldiv instruction			#
10700 #									#
10701 # XREF ****************************************************************	#
10702 #	scale_to_zero_src() - scale src exponent to zero		#
10703 #	scale_to_zero_dst() - scale dst exponent to zero		#
10704 #	unf_res4() - return default underflow result for sglop		#
10705 #	ovf_res() - return default overflow result			#
10706 #	res_qnan() - return QNAN result					#
10707 #	res_snan() - return SNAN result					#
10708 #									#
10709 # INPUT ***************************************************************	#
10710 #	a0 = pointer to extended precision source operand		#
10711 #	a1 = pointer to extended precision destination operand		#
10712 #	d0  rnd prec,mode						#
10713 #									#
10714 # OUTPUT **************************************************************	#
10715 #	fp0 = result							#
10716 #	fp1 = EXOP (if exception occurred)				#
10717 #									#
10718 # ALGORITHM ***********************************************************	#
10719 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
10720 # norms/denorms into ext/sgl/dbl precision.				#
10721 #	For norms/denorms, scale the exponents such that a divide	#
10722 # instruction won't cause an exception. Use the regular fsgldiv to	#
10723 # compute a result. Check if the regular operands would have taken	#
10724 # an exception. If so, return the default overflow/underflow result	#
10725 # and return the EXOP if exceptions are enabled. Else, scale the	#
10726 # result operand to the proper exponent.				#
10727 #									#
10728 #########################################################################
10729 
10730 	global		fsgldiv
10731 fsgldiv:
10732 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10733 
10734 	clr.w		%d1
10735 	mov.b		DTAG(%a6),%d1
10736 	lsl.b		&0x3,%d1
10737 	or.b		STAG(%a6),%d1		# combine src tags
10738 
10739 	bne.w		fsgldiv_not_norm	# optimize on non-norm input
10740 
10741 #
10742 # DIVIDE: NORMs and DENORMs ONLY!
10743 #
10744 fsgldiv_norm:
10745 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10746 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10747 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10748 
10749 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10750 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10751 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10752 
10753 	bsr.l		scale_to_zero_src	# calculate scale factor 1
10754 	mov.l		%d0,-(%sp)		# save scale factor 1
10755 
10756 	bsr.l		scale_to_zero_dst	# calculate scale factor 2
10757 
10758 	neg.l		(%sp)			# S.F. = scale1 - scale2
10759 	add.l		%d0,(%sp)
10760 
10761 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
10762 	lsr.b		&0x6,%d1
10763 	mov.l		(%sp)+,%d0
10764 	cmpi.l		%d0,&0x3fff-0x7ffe
10765 	ble.w		fsgldiv_may_ovfl
10766 
10767 	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
10768 	beq.w		fsgldiv_may_unfl	# maybe
10769 	bgt.w		fsgldiv_unfl		# yes; go handle underflow
10770 
10771 fsgldiv_normal:
10772 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10773 
10774 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
10775 	fmov.l		&0x0,%fpsr		# clear FPSR
10776 
10777 	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
10778 
10779 	fmov.l		%fpsr,%d1		# save FPSR
10780 	fmov.l		&0x0,%fpcr		# clear FPCR
10781 
10782 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10783 
10784 fsgldiv_normal_exit:
10785 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
10786 	mov.l		%d2,-(%sp)		# save d2
10787 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10788 	mov.l		%d1,%d2			# make a copy
10789 	andi.l		&0x7fff,%d1		# strip sign
10790 	andi.w		&0x8000,%d2		# keep old sign
10791 	sub.l		%d0,%d1			# add scale factor
10792 	or.w		%d2,%d1			# concat old sign,new exp
10793 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10794 	mov.l		(%sp)+,%d2		# restore d2
10795 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10796 	rts
10797 
10798 fsgldiv_may_ovfl:
10799 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10800 
10801 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10802 	fmov.l		&0x0,%fpsr		# set FPSR
10803 
10804 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
10805 
10806 	fmov.l		%fpsr,%d1
10807 	fmov.l		&0x0,%fpcr
10808 
10809 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
10810 
10811 	fmovm.x		&0x01,-(%sp)		# save result to stack
10812 	mov.w		(%sp),%d1		# fetch new exponent
10813 	add.l		&0xc,%sp		# clear result
10814 	andi.l		&0x7fff,%d1		# strip sign
10815 	sub.l		%d0,%d1			# add scale factor
10816 	cmp.l		%d1,&0x7fff		# did divide overflow?
10817 	blt.b		fsgldiv_normal_exit
10818 
10819 fsgldiv_ovfl_tst:
10820 	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821 
10822 	mov.b		FPCR_ENABLE(%a6),%d1
10823 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10824 	bne.b		fsgldiv_ovfl_ena	# yes
10825 
10826 fsgldiv_ovfl_dis:
10827 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
10828 	sne		%d1			# set sign param accordingly
10829 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10830 	andi.b		&0x30,%d0		# kill precision
10831 	bsr.l		ovf_res			# calculate default result
10832 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
10833 	fmovm.x		(%a0),&0x80		# return default result in fp0
10834 	rts
10835 
10836 fsgldiv_ovfl_ena:
10837 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10838 
10839 	mov.l		%d2,-(%sp)		# save d2
10840 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10841 	mov.l		%d1,%d2			# make a copy
10842 	andi.l		&0x7fff,%d1		# strip sign
10843 	andi.w		&0x8000,%d2		# keep old sign
10844 	sub.l		%d0,%d1			# add scale factor
10845 	subi.l		&0x6000,%d1		# subtract new bias
10846 	andi.w		&0x7fff,%d1		# clear ms bit
10847 	or.w		%d2,%d1			# concat old sign,new exp
10848 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10849 	mov.l		(%sp)+,%d2		# restore d2
10850 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10851 	bra.b		fsgldiv_ovfl_dis
10852 
10853 fsgldiv_unfl:
10854 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855 
10856 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10857 
10858 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10859 	fmov.l		&0x0,%fpsr		# clear FPSR
10860 
10861 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10862 
10863 	fmov.l		%fpsr,%d1		# save status
10864 	fmov.l		&0x0,%fpcr		# clear FPCR
10865 
10866 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10867 
10868 	mov.b		FPCR_ENABLE(%a6),%d1
10869 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10870 	bne.b		fsgldiv_unfl_ena	# yes
10871 
10872 fsgldiv_unfl_dis:
10873 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10874 
10875 	lea		FP_SCR0(%a6),%a0	# pass: result addr
10876 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10877 	bsr.l		unf_res4		# calculate default result
10878 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10879 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10880 	rts
10881 
10882 #
10883 # UNFL is enabled.
10884 #
10885 fsgldiv_unfl_ena:
10886 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10887 
10888 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10889 	fmov.l		&0x0,%fpsr		# clear FPSR
10890 
10891 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10892 
10893 	fmov.l		&0x0,%fpcr		# clear FPCR
10894 
10895 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10896 	mov.l		%d2,-(%sp)		# save d2
10897 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10898 	mov.l		%d1,%d2			# make a copy
10899 	andi.l		&0x7fff,%d1		# strip sign
10900 	andi.w		&0x8000,%d2		# keep old sign
10901 	sub.l		%d0,%d1			# add scale factor
10902 	addi.l		&0x6000,%d1		# add bias
10903 	andi.w		&0x7fff,%d1		# clear top bit
10904 	or.w		%d2,%d1			# concat old sign, new exp
10905 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10906 	mov.l		(%sp)+,%d2		# restore d2
10907 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10908 	bra.b		fsgldiv_unfl_dis
10909 
10910 #
10911 # the divide operation MAY underflow:
10912 #
10913 fsgldiv_may_unfl:
10914 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10915 
10916 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10917 	fmov.l		&0x0,%fpsr		# clear FPSR
10918 
10919 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10920 
10921 	fmov.l		%fpsr,%d1		# save status
10922 	fmov.l		&0x0,%fpcr		# clear FPCR
10923 
10924 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10925 
10926 	fabs.x		%fp0,%fp1		# make a copy of result
10927 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
10928 	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
10929 	fblt.w		fsgldiv_unfl		# yes; underflow occurred
10930 
10931 #
10932 # we still don't know if underflow occurred. result is ~ equal to 1. but,
10933 # we don't know if the result was an underflow that rounded up to a 1
10934 # or a normalized number that rounded down to a 1. so, redo the entire
10935 # operation using RZ as the rounding mode to see what the pre-rounded
10936 # result is. this case should be relatively rare.
10937 #
10938 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
10939 
10940 	clr.l		%d1			# clear scratch register
10941 	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
10942 
10943 	fmov.l		%d1,%fpcr		# set FPCR
10944 	fmov.l		&0x0,%fpsr		# clear FPSR
10945 
10946 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10947 
10948 	fmov.l		&0x0,%fpcr		# clear FPCR
10949 	fabs.x		%fp1			# make absolute value
10950 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
10951 	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
10952 	bra.w		fsgldiv_unfl		# yes; underflow occurred
10953 
10954 ############################################################################
10955 
10956 #
10957 # Divide: inputs are not both normalized; what are they?
10958 #
10959 fsgldiv_not_norm:
10960 	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961 	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962 
10963 	swbeg		&48
10964 tbl_fsgldiv_op:
10965 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
10966 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
10967 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
10968 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
10969 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
10970 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
10971 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10972 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10973 
10974 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
10975 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
10976 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
10977 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
10978 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
10979 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
10980 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10981 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10982 
10983 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
10984 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
10985 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
10986 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
10987 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
10988 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
10989 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10990 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10991 
10992 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
10993 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
10994 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
10995 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
10996 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
10997 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
10998 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10999 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11000 
11001 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
11002 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
11003 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
11004 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
11005 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
11006 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
11007 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11008 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11009 
11010 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
11011 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
11012 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
11013 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
11014 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
11015 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
11016 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11017 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11018 
11019 fsgldiv_res_qnan:
11020 	bra.l		res_qnan
11021 fsgldiv_res_snan:
11022 	bra.l		res_snan
11023 fsgldiv_res_operr:
11024 	bra.l		res_operr
11025 fsgldiv_inf_load:
11026 	bra.l		fdiv_inf_load
11027 fsgldiv_zero_load:
11028 	bra.l		fdiv_zero_load
11029 fsgldiv_inf_dst:
11030 	bra.l		fdiv_inf_dst
11031 
11032 #########################################################################
11033 # XDEF ****************************************************************	#
11034 #	fadd(): emulates the fadd instruction				#
11035 #	fsadd(): emulates the fadd instruction				#
11036 #	fdadd(): emulates the fdadd instruction				#
11037 #									#
11038 # XREF ****************************************************************	#
11039 #	addsub_scaler2() - scale the operands so they won't take exc	#
11040 #	ovf_res() - return default overflow result			#
11041 #	unf_res() - return default underflow result			#
11042 #	res_qnan() - set QNAN result					#
11043 #	res_snan() - set SNAN result					#
11044 #	res_operr() - set OPERR result					#
11045 #	scale_to_zero_src() - set src operand exponent equal to zero	#
11046 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11047 #									#
11048 # INPUT ***************************************************************	#
11049 #	a0 = pointer to extended precision source operand		#
11050 #	a1 = pointer to extended precision destination operand		#
11051 #									#
11052 # OUTPUT **************************************************************	#
11053 #	fp0 = result							#
11054 #	fp1 = EXOP (if exception occurred)				#
11055 #									#
11056 # ALGORITHM ***********************************************************	#
11057 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
11058 # norms into extended, single, and double precision.			#
11059 #	Do addition after scaling exponents such that exception won't	#
11060 # occur. Then, check result exponent to see if exception would have	#
11061 # occurred. If so, return default result and maybe EXOP. Else, insert	#
11062 # the correct result exponent and return. Set FPSR bits as appropriate.	#
11063 #									#
11064 #########################################################################
11065 
11066 	global		fsadd
11067 fsadd:
11068 	andi.b		&0x30,%d0		# clear rnd prec
11069 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11070 	bra.b		fadd
11071 
11072 	global		fdadd
11073 fdadd:
11074 	andi.b		&0x30,%d0		# clear rnd prec
11075 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11076 
11077 	global		fadd
11078 fadd:
11079 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11080 
11081 	clr.w		%d1
11082 	mov.b		DTAG(%a6),%d1
11083 	lsl.b		&0x3,%d1
11084 	or.b		STAG(%a6),%d1		# combine src tags
11085 
11086 	bne.w		fadd_not_norm		# optimize on non-norm input
11087 
11088 #
11089 # ADD: norms and denorms
11090 #
11091 fadd_norm:
11092 	bsr.l		addsub_scaler2		# scale exponents
11093 
11094 fadd_zero_entry:
11095 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11096 
11097 	fmov.l		&0x0,%fpsr		# clear FPSR
11098 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11099 
11100 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11101 
11102 	fmov.l		&0x0,%fpcr		# clear FPCR
11103 	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
11104 
11105 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11106 
11107 	fbeq.w		fadd_zero_exit		# if result is zero, end now
11108 
11109 	mov.l		%d2,-(%sp)		# save d2
11110 
11111 	fmovm.x		&0x01,-(%sp)		# save result to stack
11112 
11113 	mov.w		2+L_SCR3(%a6),%d1
11114 	lsr.b		&0x6,%d1
11115 
11116 	mov.w		(%sp),%d2		# fetch new sign, exp
11117 	andi.l		&0x7fff,%d2		# strip sign
11118 	sub.l		%d0,%d2			# add scale factor
11119 
11120 	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121 	bge.b		fadd_ovfl		# yes
11122 
11123 	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124 	blt.w		fadd_unfl		# yes
11125 	beq.w		fadd_may_unfl		# maybe; go find out
11126 
11127 fadd_normal:
11128 	mov.w		(%sp),%d1
11129 	andi.w		&0x8000,%d1		# keep sign
11130 	or.w		%d2,%d1			# concat sign,new exp
11131 	mov.w		%d1,(%sp)		# insert new exponent
11132 
11133 	fmovm.x		(%sp)+,&0x80		# return result in fp0
11134 
11135 	mov.l		(%sp)+,%d2		# restore d2
11136 	rts
11137 
11138 fadd_zero_exit:
11139 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
11140 	rts
11141 
11142 tbl_fadd_ovfl:
11143 	long		0x7fff			# ext ovfl
11144 	long		0x407f			# sgl ovfl
11145 	long		0x43ff			# dbl ovfl
11146 
11147 tbl_fadd_unfl:
11148 	long	        0x0000			# ext unfl
11149 	long		0x3f81			# sgl unfl
11150 	long		0x3c01			# dbl unfl
11151 
11152 fadd_ovfl:
11153 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154 
11155 	mov.b		FPCR_ENABLE(%a6),%d1
11156 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11157 	bne.b		fadd_ovfl_ena		# yes
11158 
11159 	add.l		&0xc,%sp
11160 fadd_ovfl_dis:
11161 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11162 	sne		%d1			# set sign param accordingly
11163 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11164 	bsr.l		ovf_res			# calculate default result
11165 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11166 	fmovm.x		(%a0),&0x80		# return default result in fp0
11167 	mov.l		(%sp)+,%d2		# restore d2
11168 	rts
11169 
11170 fadd_ovfl_ena:
11171 	mov.b		L_SCR3(%a6),%d1
11172 	andi.b		&0xc0,%d1		# is precision extended?
11173 	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
11174 
11175 fadd_ovfl_ena_cont:
11176 	mov.w		(%sp),%d1
11177 	andi.w		&0x8000,%d1		# keep sign
11178 	subi.l		&0x6000,%d2		# add extra bias
11179 	andi.w		&0x7fff,%d2
11180 	or.w		%d2,%d1			# concat sign,new exp
11181 	mov.w		%d1,(%sp)		# insert new exponent
11182 
11183 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11184 	bra.b		fadd_ovfl_dis
11185 
11186 fadd_ovfl_ena_sd:
11187 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11188 
11189 	mov.l		L_SCR3(%a6),%d1
11190 	andi.b		&0x30,%d1		# keep rnd mode
11191 	fmov.l		%d1,%fpcr		# set FPCR
11192 
11193 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11194 
11195 	fmov.l		&0x0,%fpcr		# clear FPCR
11196 
11197 	add.l		&0xc,%sp
11198 	fmovm.x		&0x01,-(%sp)
11199 	bra.b		fadd_ovfl_ena_cont
11200 
11201 fadd_unfl:
11202 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203 
11204 	add.l		&0xc,%sp
11205 
11206 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11207 
11208 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11209 	fmov.l		&0x0,%fpsr		# clear FPSR
11210 
11211 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11212 
11213 	fmov.l		&0x0,%fpcr		# clear FPCR
11214 	fmov.l		%fpsr,%d1		# save status
11215 
11216 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
11217 
11218 	mov.b		FPCR_ENABLE(%a6),%d1
11219 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11220 	bne.b		fadd_unfl_ena		# yes
11221 
11222 fadd_unfl_dis:
11223 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11224 
11225 	lea		FP_SCR0(%a6),%a0	# pass: result addr
11226 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11227 	bsr.l		unf_res			# calculate default result
11228 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
11229 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11230 	mov.l		(%sp)+,%d2		# restore d2
11231 	rts
11232 
11233 fadd_unfl_ena:
11234 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11235 
11236 	mov.l		L_SCR3(%a6),%d1
11237 	andi.b		&0xc0,%d1		# is precision extended?
11238 	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
11239 
11240 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11241 
11242 fadd_unfl_ena_cont:
11243 	fmov.l		&0x0,%fpsr		# clear FPSR
11244 
11245 	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
11246 
11247 	fmov.l		&0x0,%fpcr		# clear FPCR
11248 
11249 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11250 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11251 	mov.l		%d1,%d2			# make a copy
11252 	andi.l		&0x7fff,%d1		# strip sign
11253 	andi.w		&0x8000,%d2		# keep old sign
11254 	sub.l		%d0,%d1			# add scale factor
11255 	addi.l		&0x6000,%d1		# add new bias
11256 	andi.w		&0x7fff,%d1		# clear top bit
11257 	or.w		%d2,%d1			# concat sign,new exp
11258 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11259 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11260 	bra.w		fadd_unfl_dis
11261 
11262 fadd_unfl_ena_sd:
11263 	mov.l		L_SCR3(%a6),%d1
11264 	andi.b		&0x30,%d1		# use only rnd mode
11265 	fmov.l		%d1,%fpcr		# set FPCR
11266 
11267 	bra.b		fadd_unfl_ena_cont
11268 
11269 #
11270 # result is equal to the smallest normalized number in the selected precision
11271 # if the precision is extended, this result could not have come from an
11272 # underflow that rounded up.
11273 #
11274 fadd_may_unfl:
11275 	mov.l		L_SCR3(%a6),%d1
11276 	andi.b		&0xc0,%d1
11277 	beq.w		fadd_normal		# yes; no underflow occurred
11278 
11279 	mov.l		0x4(%sp),%d1		# extract hi(man)
11280 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11281 	bne.w		fadd_normal		# no; no underflow occurred
11282 
11283 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11284 	bne.w		fadd_normal		# no; no underflow occurred
11285 
11286 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287 	beq.w		fadd_normal		# no; no underflow occurred
11288 
11289 #
11290 # ok, so now the result has a exponent equal to the smallest normalized
11291 # exponent for the selected precision. also, the mantissa is equal to
11292 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293 # g,r,s.
11294 # now, we must determine whether the pre-rounded result was an underflow
11295 # rounded "up" or a normalized number rounded "down".
11296 # so, we do this be re-executing the add using RZ as the rounding mode and
11297 # seeing if the new result is smaller or equal to the current result.
11298 #
11299 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11300 
11301 	mov.l		L_SCR3(%a6),%d1
11302 	andi.b		&0xc0,%d1		# keep rnd prec
11303 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11304 	fmov.l		%d1,%fpcr		# set FPCR
11305 	fmov.l		&0x0,%fpsr		# clear FPSR
11306 
11307 	fadd.x		FP_SCR0(%a6),%fp1	# execute add
11308 
11309 	fmov.l		&0x0,%fpcr		# clear FPCR
11310 
11311 	fabs.x		%fp0			# compare absolute values
11312 	fabs.x		%fp1
11313 	fcmp.x		%fp0,%fp1		# is first result > second?
11314 
11315 	fbgt.w		fadd_unfl		# yes; it's an underflow
11316 	bra.w		fadd_normal		# no; it's not an underflow
11317 
11318 ##########################################################################
11319 
11320 #
11321 # Add: inputs are not both normalized; what are they?
11322 #
11323 fadd_not_norm:
11324 	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325 	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
11326 
11327 	swbeg		&48
11328 tbl_fadd_op:
11329 	short		fadd_norm	- tbl_fadd_op # NORM + NORM
11330 	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
11331 	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
11332 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11333 	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
11334 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11335 	short		tbl_fadd_op	- tbl_fadd_op #
11336 	short		tbl_fadd_op	- tbl_fadd_op #
11337 
11338 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
11339 	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
11340 	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
11341 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11342 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
11343 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11344 	short		tbl_fadd_op	- tbl_fadd_op #
11345 	short		tbl_fadd_op	- tbl_fadd_op #
11346 
11347 	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
11348 	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
11349 	short		fadd_inf_2	- tbl_fadd_op # INF + INF
11350 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11351 	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
11352 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11353 	short		tbl_fadd_op	- tbl_fadd_op #
11354 	short		tbl_fadd_op	- tbl_fadd_op #
11355 
11356 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
11357 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
11358 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
11359 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
11360 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
11361 	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
11362 	short		tbl_fadd_op	- tbl_fadd_op #
11363 	short		tbl_fadd_op	- tbl_fadd_op #
11364 
11365 	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
11366 	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
11367 	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
11368 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11369 	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
11370 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11371 	short		tbl_fadd_op	- tbl_fadd_op #
11372 	short		tbl_fadd_op	- tbl_fadd_op #
11373 
11374 	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
11375 	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
11376 	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
11377 	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
11378 	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
11379 	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
11380 	short		tbl_fadd_op	- tbl_fadd_op #
11381 	short		tbl_fadd_op	- tbl_fadd_op #
11382 
11383 fadd_res_qnan:
11384 	bra.l		res_qnan
11385 fadd_res_snan:
11386 	bra.l		res_snan
11387 
11388 #
11389 # both operands are ZEROes
11390 #
11391 fadd_zero_2:
11392 	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
11393 	mov.b		DST_EX(%a1),%d1
11394 	eor.b		%d0,%d1
11395 	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
11396 
11397 # the signs are the same. so determine whether they are positive or negative
11398 # and return the appropriately signed zero.
11399 	tst.b		%d0			# are ZEROes positive or negative?
11400 	bmi.b		fadd_zero_rm		# negative
11401 	fmov.s		&0x00000000,%fp0	# return +ZERO
11402 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11403 	rts
11404 
11405 #
11406 # the ZEROes have opposite signs:
11407 # - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408 # - -ZERO is returned in the case of RM.
11409 #
11410 fadd_zero_2_chk_rm:
11411 	mov.b		3+L_SCR3(%a6),%d1
11412 	andi.b		&0x30,%d1		# extract rnd mode
11413 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
11414 	beq.b		fadd_zero_rm		# yes
11415 	fmov.s		&0x00000000,%fp0	# return +ZERO
11416 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11417 	rts
11418 
11419 fadd_zero_rm:
11420 	fmov.s		&0x80000000,%fp0	# return -ZERO
11421 	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422 	rts
11423 
11424 #
11425 # one operand is a ZERO and the other is a DENORM or NORM. scale
11426 # the DENORM or NORM and jump to the regular fadd routine.
11427 #
11428 fadd_zero_dst:
11429 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11430 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11431 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11432 	bsr.l		scale_to_zero_src	# scale the operand
11433 	clr.w		FP_SCR1_EX(%a6)
11434 	clr.l		FP_SCR1_HI(%a6)
11435 	clr.l		FP_SCR1_LO(%a6)
11436 	bra.w		fadd_zero_entry		# go execute fadd
11437 
11438 fadd_zero_src:
11439 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11440 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11441 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11442 	bsr.l		scale_to_zero_dst	# scale the operand
11443 	clr.w		FP_SCR0_EX(%a6)
11444 	clr.l		FP_SCR0_HI(%a6)
11445 	clr.l		FP_SCR0_LO(%a6)
11446 	bra.w		fadd_zero_entry		# go execute fadd
11447 
11448 #
11449 # both operands are INFs. an OPERR will result if the INFs have
11450 # different signs. else, an INF of the same sign is returned
11451 #
11452 fadd_inf_2:
11453 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11454 	mov.b		DST_EX(%a1),%d1
11455 	eor.b		%d1,%d0
11456 	bmi.l		res_operr		# weed out (-INF)+(+INF)
11457 
11458 # ok, so it's not an OPERR. but, we do have to remember to return the
11459 # src INF since that's where the 881/882 gets the j-bit from...
11460 
11461 #
11462 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11463 #
11464 fadd_inf_src:
11465 	fmovm.x		SRC(%a0),&0x80		# return src INF
11466 	tst.b		SRC_EX(%a0)		# is INF positive?
11467 	bpl.b		fadd_inf_done		# yes; we're done
11468 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469 	rts
11470 
11471 #
11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11473 #
11474 fadd_inf_dst:
11475 	fmovm.x		DST(%a1),&0x80		# return dst INF
11476 	tst.b		DST_EX(%a1)		# is INF positive?
11477 	bpl.b		fadd_inf_done		# yes; we're done
11478 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479 	rts
11480 
11481 fadd_inf_done:
11482 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
11483 	rts
11484 
11485 #########################################################################
11486 # XDEF ****************************************************************	#
11487 #	fsub(): emulates the fsub instruction				#
11488 #	fssub(): emulates the fssub instruction				#
11489 #	fdsub(): emulates the fdsub instruction				#
11490 #									#
11491 # XREF ****************************************************************	#
11492 #	addsub_scaler2() - scale the operands so they won't take exc	#
11493 #	ovf_res() - return default overflow result			#
11494 #	unf_res() - return default underflow result			#
11495 #	res_qnan() - set QNAN result					#
11496 #	res_snan() - set SNAN result					#
11497 #	res_operr() - set OPERR result					#
11498 #	scale_to_zero_src() - set src operand exponent equal to zero	#
11499 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11500 #									#
11501 # INPUT ***************************************************************	#
11502 #	a0 = pointer to extended precision source operand		#
11503 #	a1 = pointer to extended precision destination operand		#
11504 #									#
11505 # OUTPUT **************************************************************	#
11506 #	fp0 = result							#
11507 #	fp1 = EXOP (if exception occurred)				#
11508 #									#
11509 # ALGORITHM ***********************************************************	#
11510 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
11511 # norms into extended, single, and double precision.			#
11512 #	Do subtraction after scaling exponents such that exception won't#
11513 # occur. Then, check result exponent to see if exception would have	#
11514 # occurred. If so, return default result and maybe EXOP. Else, insert	#
11515 # the correct result exponent and return. Set FPSR bits as appropriate.	#
11516 #									#
11517 #########################################################################
11518 
11519 	global		fssub
11520 fssub:
11521 	andi.b		&0x30,%d0		# clear rnd prec
11522 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11523 	bra.b		fsub
11524 
11525 	global		fdsub
11526 fdsub:
11527 	andi.b		&0x30,%d0		# clear rnd prec
11528 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11529 
11530 	global		fsub
11531 fsub:
11532 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11533 
11534 	clr.w		%d1
11535 	mov.b		DTAG(%a6),%d1
11536 	lsl.b		&0x3,%d1
11537 	or.b		STAG(%a6),%d1		# combine src tags
11538 
11539 	bne.w		fsub_not_norm		# optimize on non-norm input
11540 
11541 #
11542 # SUB: norms and denorms
11543 #
11544 fsub_norm:
11545 	bsr.l		addsub_scaler2		# scale exponents
11546 
11547 fsub_zero_entry:
11548 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11549 
11550 	fmov.l		&0x0,%fpsr		# clear FPSR
11551 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11552 
11553 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11554 
11555 	fmov.l		&0x0,%fpcr		# clear FPCR
11556 	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
11557 
11558 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11559 
11560 	fbeq.w		fsub_zero_exit		# if result zero, end now
11561 
11562 	mov.l		%d2,-(%sp)		# save d2
11563 
11564 	fmovm.x		&0x01,-(%sp)		# save result to stack
11565 
11566 	mov.w		2+L_SCR3(%a6),%d1
11567 	lsr.b		&0x6,%d1
11568 
11569 	mov.w		(%sp),%d2		# fetch new exponent
11570 	andi.l		&0x7fff,%d2		# strip sign
11571 	sub.l		%d0,%d2			# add scale factor
11572 
11573 	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574 	bge.b		fsub_ovfl		# yes
11575 
11576 	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577 	blt.w		fsub_unfl		# yes
11578 	beq.w		fsub_may_unfl		# maybe; go find out
11579 
11580 fsub_normal:
11581 	mov.w		(%sp),%d1
11582 	andi.w		&0x8000,%d1		# keep sign
11583 	or.w		%d2,%d1			# insert new exponent
11584 	mov.w		%d1,(%sp)		# insert new exponent
11585 
11586 	fmovm.x		(%sp)+,&0x80		# return result in fp0
11587 
11588 	mov.l		(%sp)+,%d2		# restore d2
11589 	rts
11590 
11591 fsub_zero_exit:
11592 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
11593 	rts
11594 
11595 tbl_fsub_ovfl:
11596 	long		0x7fff			# ext ovfl
11597 	long		0x407f			# sgl ovfl
11598 	long		0x43ff			# dbl ovfl
11599 
11600 tbl_fsub_unfl:
11601 	long	        0x0000			# ext unfl
11602 	long		0x3f81			# sgl unfl
11603 	long		0x3c01			# dbl unfl
11604 
11605 fsub_ovfl:
11606 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607 
11608 	mov.b		FPCR_ENABLE(%a6),%d1
11609 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11610 	bne.b		fsub_ovfl_ena		# yes
11611 
11612 	add.l		&0xc,%sp
11613 fsub_ovfl_dis:
11614 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11615 	sne		%d1			# set sign param accordingly
11616 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11617 	bsr.l		ovf_res			# calculate default result
11618 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11619 	fmovm.x		(%a0),&0x80		# return default result in fp0
11620 	mov.l		(%sp)+,%d2		# restore d2
11621 	rts
11622 
11623 fsub_ovfl_ena:
11624 	mov.b		L_SCR3(%a6),%d1
11625 	andi.b		&0xc0,%d1		# is precision extended?
11626 	bne.b		fsub_ovfl_ena_sd	# no
11627 
11628 fsub_ovfl_ena_cont:
11629 	mov.w		(%sp),%d1		# fetch {sgn,exp}
11630 	andi.w		&0x8000,%d1		# keep sign
11631 	subi.l		&0x6000,%d2		# subtract new bias
11632 	andi.w		&0x7fff,%d2		# clear top bit
11633 	or.w		%d2,%d1			# concat sign,exp
11634 	mov.w		%d1,(%sp)		# insert new exponent
11635 
11636 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11637 	bra.b		fsub_ovfl_dis
11638 
11639 fsub_ovfl_ena_sd:
11640 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11641 
11642 	mov.l		L_SCR3(%a6),%d1
11643 	andi.b		&0x30,%d1		# clear rnd prec
11644 	fmov.l		%d1,%fpcr		# set FPCR
11645 
11646 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11647 
11648 	fmov.l		&0x0,%fpcr		# clear FPCR
11649 
11650 	add.l		&0xc,%sp
11651 	fmovm.x		&0x01,-(%sp)
11652 	bra.b		fsub_ovfl_ena_cont
11653 
11654 fsub_unfl:
11655 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656 
11657 	add.l		&0xc,%sp
11658 
11659 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11660 
11661 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11662 	fmov.l		&0x0,%fpsr		# clear FPSR
11663 
11664 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11665 
11666 	fmov.l		&0x0,%fpcr		# clear FPCR
11667 	fmov.l		%fpsr,%d1		# save status
11668 
11669 	or.l		%d1,USER_FPSR(%a6)
11670 
11671 	mov.b		FPCR_ENABLE(%a6),%d1
11672 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11673 	bne.b		fsub_unfl_ena		# yes
11674 
11675 fsub_unfl_dis:
11676 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11677 
11678 	lea		FP_SCR0(%a6),%a0	# pass: result addr
11679 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11680 	bsr.l		unf_res			# calculate default result
11681 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
11682 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11683 	mov.l		(%sp)+,%d2		# restore d2
11684 	rts
11685 
11686 fsub_unfl_ena:
11687 	fmovm.x		FP_SCR1(%a6),&0x40
11688 
11689 	mov.l		L_SCR3(%a6),%d1
11690 	andi.b		&0xc0,%d1		# is precision extended?
11691 	bne.b		fsub_unfl_ena_sd	# no
11692 
11693 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11694 
11695 fsub_unfl_ena_cont:
11696 	fmov.l		&0x0,%fpsr		# clear FPSR
11697 
11698 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11699 
11700 	fmov.l		&0x0,%fpcr		# clear FPCR
11701 
11702 	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
11703 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11704 	mov.l		%d1,%d2			# make a copy
11705 	andi.l		&0x7fff,%d1		# strip sign
11706 	andi.w		&0x8000,%d2		# keep old sign
11707 	sub.l		%d0,%d1			# add scale factor
11708 	addi.l		&0x6000,%d1		# subtract new bias
11709 	andi.w		&0x7fff,%d1		# clear top bit
11710 	or.w		%d2,%d1			# concat sgn,exp
11711 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11712 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11713 	bra.w		fsub_unfl_dis
11714 
11715 fsub_unfl_ena_sd:
11716 	mov.l		L_SCR3(%a6),%d1
11717 	andi.b		&0x30,%d1		# clear rnd prec
11718 	fmov.l		%d1,%fpcr		# set FPCR
11719 
11720 	bra.b		fsub_unfl_ena_cont
11721 
11722 #
11723 # result is equal to the smallest normalized number in the selected precision
11724 # if the precision is extended, this result could not have come from an
11725 # underflow that rounded up.
11726 #
11727 fsub_may_unfl:
11728 	mov.l		L_SCR3(%a6),%d1
11729 	andi.b		&0xc0,%d1		# fetch rnd prec
11730 	beq.w		fsub_normal		# yes; no underflow occurred
11731 
11732 	mov.l		0x4(%sp),%d1
11733 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11734 	bne.w		fsub_normal		# no; no underflow occurred
11735 
11736 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11737 	bne.w		fsub_normal		# no; no underflow occurred
11738 
11739 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740 	beq.w		fsub_normal		# no; no underflow occurred
11741 
11742 #
11743 # ok, so now the result has a exponent equal to the smallest normalized
11744 # exponent for the selected precision. also, the mantissa is equal to
11745 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746 # g,r,s.
11747 # now, we must determine whether the pre-rounded result was an underflow
11748 # rounded "up" or a normalized number rounded "down".
11749 # so, we do this be re-executing the add using RZ as the rounding mode and
11750 # seeing if the new result is smaller or equal to the current result.
11751 #
11752 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11753 
11754 	mov.l		L_SCR3(%a6),%d1
11755 	andi.b		&0xc0,%d1		# keep rnd prec
11756 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11757 	fmov.l		%d1,%fpcr		# set FPCR
11758 	fmov.l		&0x0,%fpsr		# clear FPSR
11759 
11760 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11761 
11762 	fmov.l		&0x0,%fpcr		# clear FPCR
11763 
11764 	fabs.x		%fp0			# compare absolute values
11765 	fabs.x		%fp1
11766 	fcmp.x		%fp0,%fp1		# is first result > second?
11767 
11768 	fbgt.w		fsub_unfl		# yes; it's an underflow
11769 	bra.w		fsub_normal		# no; it's not an underflow
11770 
11771 ##########################################################################
11772 
11773 #
11774 # Sub: inputs are not both normalized; what are they?
11775 #
11776 fsub_not_norm:
11777 	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778 	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
11779 
11780 	swbeg		&48
11781 tbl_fsub_op:
11782 	short		fsub_norm	- tbl_fsub_op # NORM - NORM
11783 	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
11784 	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
11785 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11786 	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
11787 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11788 	short		tbl_fsub_op	- tbl_fsub_op #
11789 	short		tbl_fsub_op	- tbl_fsub_op #
11790 
11791 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
11792 	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
11793 	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
11794 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11795 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
11796 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11797 	short		tbl_fsub_op	- tbl_fsub_op #
11798 	short		tbl_fsub_op	- tbl_fsub_op #
11799 
11800 	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
11801 	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
11802 	short		fsub_inf_2	- tbl_fsub_op # INF - INF
11803 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11804 	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
11805 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11806 	short		tbl_fsub_op	- tbl_fsub_op #
11807 	short		tbl_fsub_op	- tbl_fsub_op #
11808 
11809 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
11810 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
11811 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
11812 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
11813 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
11814 	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
11815 	short		tbl_fsub_op	- tbl_fsub_op #
11816 	short		tbl_fsub_op	- tbl_fsub_op #
11817 
11818 	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
11819 	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
11820 	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
11821 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11822 	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
11823 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11824 	short		tbl_fsub_op	- tbl_fsub_op #
11825 	short		tbl_fsub_op	- tbl_fsub_op #
11826 
11827 	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
11828 	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
11829 	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
11830 	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
11831 	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
11832 	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
11833 	short		tbl_fsub_op	- tbl_fsub_op #
11834 	short		tbl_fsub_op	- tbl_fsub_op #
11835 
11836 fsub_res_qnan:
11837 	bra.l		res_qnan
11838 fsub_res_snan:
11839 	bra.l		res_snan
11840 
11841 #
11842 # both operands are ZEROes
11843 #
11844 fsub_zero_2:
11845 	mov.b		SRC_EX(%a0),%d0
11846 	mov.b		DST_EX(%a1),%d1
11847 	eor.b		%d1,%d0
11848 	bpl.b		fsub_zero_2_chk_rm
11849 
11850 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851 	tst.b		%d0			# is dst negative?
11852 	bmi.b		fsub_zero_2_rm		# yes
11853 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11854 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11855 	rts
11856 
11857 #
11858 # the ZEROes have the same signs:
11859 # - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860 # - -ZERO is returned in the case of RM.
11861 #
11862 fsub_zero_2_chk_rm:
11863 	mov.b		3+L_SCR3(%a6),%d1
11864 	andi.b		&0x30,%d1		# extract rnd mode
11865 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
11866 	beq.b		fsub_zero_2_rm		# yes
11867 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11868 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11869 	rts
11870 
11871 fsub_zero_2_rm:
11872 	fmov.s		&0x80000000,%fp0	# return -ZERO
11873 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
11874 	rts
11875 
11876 #
11877 # one operand is a ZERO and the other is a DENORM or a NORM.
11878 # scale the DENORM or NORM and jump to the regular fsub routine.
11879 #
11880 fsub_zero_dst:
11881 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11882 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11883 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11884 	bsr.l		scale_to_zero_src	# scale the operand
11885 	clr.w		FP_SCR1_EX(%a6)
11886 	clr.l		FP_SCR1_HI(%a6)
11887 	clr.l		FP_SCR1_LO(%a6)
11888 	bra.w		fsub_zero_entry		# go execute fsub
11889 
11890 fsub_zero_src:
11891 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11892 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11893 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11894 	bsr.l		scale_to_zero_dst	# scale the operand
11895 	clr.w		FP_SCR0_EX(%a6)
11896 	clr.l		FP_SCR0_HI(%a6)
11897 	clr.l		FP_SCR0_LO(%a6)
11898 	bra.w		fsub_zero_entry		# go execute fsub
11899 
11900 #
11901 # both operands are INFs. an OPERR will result if the INFs have the
11902 # same signs. else,
11903 #
11904 fsub_inf_2:
11905 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11906 	mov.b		DST_EX(%a1),%d1
11907 	eor.b		%d1,%d0
11908 	bpl.l		res_operr		# weed out (-INF)+(+INF)
11909 
11910 # ok, so it's not an OPERR. but we do have to remember to return
11911 # the src INF since that's where the 881/882 gets the j-bit.
11912 
11913 fsub_inf_src:
11914 	fmovm.x		SRC(%a0),&0x80		# return src INF
11915 	fneg.x		%fp0			# invert sign
11916 	fbge.w		fsub_inf_done		# sign is now positive
11917 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918 	rts
11919 
11920 fsub_inf_dst:
11921 	fmovm.x		DST(%a1),&0x80		# return dst INF
11922 	tst.b		DST_EX(%a1)		# is INF negative?
11923 	bpl.b		fsub_inf_done		# no
11924 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925 	rts
11926 
11927 fsub_inf_done:
11928 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
11929 	rts
11930 
11931 #########################################################################
11932 # XDEF ****************************************************************	#
11933 #	fsqrt(): emulates the fsqrt instruction				#
11934 #	fssqrt(): emulates the fssqrt instruction			#
11935 #	fdsqrt(): emulates the fdsqrt instruction			#
11936 #									#
11937 # XREF ****************************************************************	#
11938 #	scale_sqrt() - scale the source operand				#
11939 #	unf_res() - return default underflow result			#
11940 #	ovf_res() - return default overflow result			#
11941 #	res_qnan_1op() - return QNAN result				#
11942 #	res_snan_1op() - return SNAN result				#
11943 #									#
11944 # INPUT ***************************************************************	#
11945 #	a0 = pointer to extended precision source operand		#
11946 #	d0  rnd prec,mode						#
11947 #									#
11948 # OUTPUT **************************************************************	#
11949 #	fp0 = result							#
11950 #	fp1 = EXOP (if exception occurred)				#
11951 #									#
11952 # ALGORITHM ***********************************************************	#
11953 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
11954 # norms/denorms into ext/sgl/dbl precision.				#
11955 #	For norms/denorms, scale the exponents such that a sqrt		#
11956 # instruction won't cause an exception. Use the regular fsqrt to	#
11957 # compute a result. Check if the regular operands would have taken	#
11958 # an exception. If so, return the default overflow/underflow result	#
11959 # and return the EXOP if exceptions are enabled. Else, scale the	#
11960 # result operand to the proper exponent.				#
11961 #									#
11962 #########################################################################
11963 
11964 	global		fssqrt
11965 fssqrt:
11966 	andi.b		&0x30,%d0		# clear rnd prec
11967 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
11968 	bra.b		fsqrt
11969 
11970 	global		fdsqrt
11971 fdsqrt:
11972 	andi.b		&0x30,%d0		# clear rnd prec
11973 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
11974 
11975 	global		fsqrt
11976 fsqrt:
11977 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11978 	clr.w		%d1
11979 	mov.b		STAG(%a6),%d1
11980 	bne.w		fsqrt_not_norm		# optimize on non-norm input
11981 
11982 #
11983 # SQUARE ROOT: norms and denorms ONLY!
11984 #
11985 fsqrt_norm:
11986 	tst.b		SRC_EX(%a0)		# is operand negative?
11987 	bmi.l		res_operr		# yes
11988 
11989 	andi.b		&0xc0,%d0		# is precision extended?
11990 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
11991 
11992 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11993 	fmov.l		&0x0,%fpsr		# clear FPSR
11994 
11995 	fsqrt.x		(%a0),%fp0		# execute square root
11996 
11997 	fmov.l		%fpsr,%d1
11998 	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
11999 
12000 	rts
12001 
12002 fsqrt_denorm:
12003 	tst.b		SRC_EX(%a0)		# is operand negative?
12004 	bmi.l		res_operr		# yes
12005 
12006 	andi.b		&0xc0,%d0		# is precision extended?
12007 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
12008 
12009 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12010 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12011 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12012 
12013 	bsr.l		scale_sqrt		# calculate scale factor
12014 
12015 	bra.w		fsqrt_sd_normal
12016 
12017 #
12018 # operand is either single or double
12019 #
12020 fsqrt_not_ext:
12021 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12022 	bne.w		fsqrt_dbl
12023 
12024 #
12025 # operand is to be rounded to single precision
12026 #
12027 fsqrt_sgl:
12028 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12029 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12030 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12031 
12032 	bsr.l		scale_sqrt		# calculate scale factor
12033 
12034 	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
12035 	beq.w		fsqrt_sd_may_unfl
12036 	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
12037 	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
12038 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12039 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12040 
12041 #
12042 # operand will NOT overflow or underflow when moved in to the fp reg file
12043 #
12044 fsqrt_sd_normal:
12045 	fmov.l		&0x0,%fpsr		# clear FPSR
12046 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12047 
12048 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12049 
12050 	fmov.l		%fpsr,%d1		# save FPSR
12051 	fmov.l		&0x0,%fpcr		# clear FPCR
12052 
12053 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12054 
12055 fsqrt_sd_normal_exit:
12056 	mov.l		%d2,-(%sp)		# save d2
12057 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12058 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12059 	mov.l		%d1,%d2			# make a copy
12060 	andi.l		&0x7fff,%d1		# strip sign
12061 	sub.l		%d0,%d1			# add scale factor
12062 	andi.w		&0x8000,%d2		# keep old sign
12063 	or.w		%d1,%d2			# concat old sign,new exp
12064 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12065 	mov.l		(%sp)+,%d2		# restore d2
12066 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12067 	rts
12068 
12069 #
12070 # operand is to be rounded to double precision
12071 #
12072 fsqrt_dbl:
12073 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12074 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12075 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12076 
12077 	bsr.l		scale_sqrt		# calculate scale factor
12078 
12079 	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
12080 	beq.w		fsqrt_sd_may_unfl
12081 	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
12082 	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
12083 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12084 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12085 	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
12086 
12087 # we're on the line here and the distinguising characteristic is whether
12088 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089 # elsewise fall through to underflow.
12090 fsqrt_sd_may_unfl:
12091 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12092 	bne.w		fsqrt_sd_normal		# yes, so no underflow
12093 
12094 #
12095 # operand WILL underflow when moved in to the fp register file
12096 #
12097 fsqrt_sd_unfl:
12098 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099 
12100 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12101 	fmov.l		&0x0,%fpsr		# clear FPSR
12102 
12103 	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
12104 
12105 	fmov.l		%fpsr,%d1		# save status
12106 	fmov.l		&0x0,%fpcr		# clear FPCR
12107 
12108 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12109 
12110 # if underflow or inexact is enabled, go calculate EXOP first.
12111 	mov.b		FPCR_ENABLE(%a6),%d1
12112 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12113 	bne.b		fsqrt_sd_unfl_ena	# yes
12114 
12115 fsqrt_sd_unfl_dis:
12116 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12117 
12118 	lea		FP_SCR0(%a6),%a0	# pass: result addr
12119 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12120 	bsr.l		unf_res			# calculate default result
12121 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
12122 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12123 	rts
12124 
12125 #
12126 # operand will underflow AND underflow is enabled.
12127 # Therefore, we must return the result rounded to extended precision.
12128 #
12129 fsqrt_sd_unfl_ena:
12130 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12133 
12134 	mov.l		%d2,-(%sp)		# save d2
12135 	mov.l		%d1,%d2			# make a copy
12136 	andi.l		&0x7fff,%d1		# strip sign
12137 	andi.w		&0x8000,%d2		# keep old sign
12138 	sub.l		%d0,%d1			# subtract scale factor
12139 	addi.l		&0x6000,%d1		# add new bias
12140 	andi.w		&0x7fff,%d1
12141 	or.w		%d2,%d1			# concat new sign,new exp
12142 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
12143 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12144 	mov.l		(%sp)+,%d2		# restore d2
12145 	bra.b		fsqrt_sd_unfl_dis
12146 
12147 #
12148 # operand WILL overflow.
12149 #
12150 fsqrt_sd_ovfl:
12151 	fmov.l		&0x0,%fpsr		# clear FPSR
12152 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12153 
12154 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
12155 
12156 	fmov.l		&0x0,%fpcr		# clear FPCR
12157 	fmov.l		%fpsr,%d1		# save FPSR
12158 
12159 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12160 
12161 fsqrt_sd_ovfl_tst:
12162 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163 
12164 	mov.b		FPCR_ENABLE(%a6),%d1
12165 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12166 	bne.b		fsqrt_sd_ovfl_ena	# yes
12167 
12168 #
12169 # OVFL is not enabled; therefore, we must create the default result by
12170 # calling ovf_res().
12171 #
12172 fsqrt_sd_ovfl_dis:
12173 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12174 	sne		%d1			# set sign param accordingly
12175 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12176 	bsr.l		ovf_res			# calculate default result
12177 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12178 	fmovm.x		(%a0),&0x80		# return default result in fp0
12179 	rts
12180 
12181 #
12182 # OVFL is enabled.
12183 # the INEX2 bit has already been updated by the round to the correct precision.
12184 # now, round to extended(and don't alter the FPSR).
12185 #
12186 fsqrt_sd_ovfl_ena:
12187 	mov.l		%d2,-(%sp)		# save d2
12188 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12189 	mov.l		%d1,%d2			# make a copy
12190 	andi.l		&0x7fff,%d1		# strip sign
12191 	andi.w		&0x8000,%d2		# keep old sign
12192 	sub.l		%d0,%d1			# add scale factor
12193 	subi.l		&0x6000,%d1		# subtract bias
12194 	andi.w		&0x7fff,%d1
12195 	or.w		%d2,%d1			# concat sign,exp
12196 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12197 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12198 	mov.l		(%sp)+,%d2		# restore d2
12199 	bra.b		fsqrt_sd_ovfl_dis
12200 
12201 #
12202 # the move in MAY underflow. so...
12203 #
12204 fsqrt_sd_may_ovfl:
12205 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12206 	bne.w		fsqrt_sd_ovfl		# yes, so overflow
12207 
12208 	fmov.l		&0x0,%fpsr		# clear FPSR
12209 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12210 
12211 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12212 
12213 	fmov.l		%fpsr,%d1		# save status
12214 	fmov.l		&0x0,%fpcr		# clear FPCR
12215 
12216 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12217 
12218 	fmov.x		%fp0,%fp1		# make a copy of result
12219 	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
12220 	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
12221 
12222 # no, it didn't overflow; we have correct result
12223 	bra.w		fsqrt_sd_normal_exit
12224 
12225 ##########################################################################
12226 
12227 #
12228 # input is not normalized; what is it?
12229 #
12230 fsqrt_not_norm:
12231 	cmpi.b		%d1,&DENORM		# weed out DENORM
12232 	beq.w		fsqrt_denorm
12233 	cmpi.b		%d1,&ZERO		# weed out ZERO
12234 	beq.b		fsqrt_zero
12235 	cmpi.b		%d1,&INF		# weed out INF
12236 	beq.b		fsqrt_inf
12237 	cmpi.b		%d1,&SNAN		# weed out SNAN
12238 	beq.l		res_snan_1op
12239 	bra.l		res_qnan_1op
12240 
12241 #
12242 #	fsqrt(+0) = +0
12243 #	fsqrt(-0) = -0
12244 #	fsqrt(+INF) = +INF
12245 #	fsqrt(-INF) = OPERR
12246 #
12247 fsqrt_zero:
12248 	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
12249 	bmi.b		fsqrt_zero_m		# negative
12250 fsqrt_zero_p:
12251 	fmov.s		&0x00000000,%fp0	# return +ZERO
12252 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
12253 	rts
12254 fsqrt_zero_m:
12255 	fmov.s		&0x80000000,%fp0	# return -ZERO
12256 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
12257 	rts
12258 
12259 fsqrt_inf:
12260 	tst.b		SRC_EX(%a0)		# is INF positive or negative?
12261 	bmi.l		res_operr		# negative
12262 fsqrt_inf_p:
12263 	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
12264 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
12265 	rts
12266 
12267 #########################################################################
12268 # XDEF ****************************************************************	#
12269 #	fetch_dreg(): fetch register according to index in d1		#
12270 #									#
12271 # XREF ****************************************************************	#
12272 #	None								#
12273 #									#
12274 # INPUT ***************************************************************	#
12275 #	d1 = index of register to fetch from				#
12276 #									#
12277 # OUTPUT **************************************************************	#
12278 #	d0 = value of register fetched					#
12279 #									#
12280 # ALGORITHM ***********************************************************	#
12281 #	According to the index value in d1 which can range from zero	#
12282 # to fifteen, load the corresponding register file value (where		#
12283 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
12284 # stack. The rest should still be in their original places.		#
12285 #									#
12286 #########################################################################
12287 
12288 # this routine leaves d1 intact for subsequent store_dreg calls.
12289 	global		fetch_dreg
12290 fetch_dreg:
12291 	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
12292 	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
12293 
12294 tbl_fdreg:
12295 	short		fdreg0 - tbl_fdreg
12296 	short		fdreg1 - tbl_fdreg
12297 	short		fdreg2 - tbl_fdreg
12298 	short		fdreg3 - tbl_fdreg
12299 	short		fdreg4 - tbl_fdreg
12300 	short		fdreg5 - tbl_fdreg
12301 	short		fdreg6 - tbl_fdreg
12302 	short		fdreg7 - tbl_fdreg
12303 	short		fdreg8 - tbl_fdreg
12304 	short		fdreg9 - tbl_fdreg
12305 	short		fdrega - tbl_fdreg
12306 	short		fdregb - tbl_fdreg
12307 	short		fdregc - tbl_fdreg
12308 	short		fdregd - tbl_fdreg
12309 	short		fdrege - tbl_fdreg
12310 	short		fdregf - tbl_fdreg
12311 
12312 fdreg0:
12313 	mov.l		EXC_DREGS+0x0(%a6),%d0
12314 	rts
12315 fdreg1:
12316 	mov.l		EXC_DREGS+0x4(%a6),%d0
12317 	rts
12318 fdreg2:
12319 	mov.l		%d2,%d0
12320 	rts
12321 fdreg3:
12322 	mov.l		%d3,%d0
12323 	rts
12324 fdreg4:
12325 	mov.l		%d4,%d0
12326 	rts
12327 fdreg5:
12328 	mov.l		%d5,%d0
12329 	rts
12330 fdreg6:
12331 	mov.l		%d6,%d0
12332 	rts
12333 fdreg7:
12334 	mov.l		%d7,%d0
12335 	rts
12336 fdreg8:
12337 	mov.l		EXC_DREGS+0x8(%a6),%d0
12338 	rts
12339 fdreg9:
12340 	mov.l		EXC_DREGS+0xc(%a6),%d0
12341 	rts
12342 fdrega:
12343 	mov.l		%a2,%d0
12344 	rts
12345 fdregb:
12346 	mov.l		%a3,%d0
12347 	rts
12348 fdregc:
12349 	mov.l		%a4,%d0
12350 	rts
12351 fdregd:
12352 	mov.l		%a5,%d0
12353 	rts
12354 fdrege:
12355 	mov.l		(%a6),%d0
12356 	rts
12357 fdregf:
12358 	mov.l		EXC_A7(%a6),%d0
12359 	rts
12360 
12361 #########################################################################
12362 # XDEF ****************************************************************	#
12363 #	store_dreg_l(): store longword to data register specified by d1	#
12364 #									#
12365 # XREF ****************************************************************	#
12366 #	None								#
12367 #									#
12368 # INPUT ***************************************************************	#
12369 #	d0 = longowrd value to store					#
12370 #	d1 = index of register to fetch from				#
12371 #									#
12372 # OUTPUT **************************************************************	#
12373 #	(data register is updated)					#
12374 #									#
12375 # ALGORITHM ***********************************************************	#
12376 #	According to the index value in d1, store the longword value	#
12377 # in d0 to the corresponding data register. D0/D1 are on the stack	#
12378 # while the rest are in their initial places.				#
12379 #									#
12380 #########################################################################
12381 
12382 	global		store_dreg_l
12383 store_dreg_l:
12384 	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
12385 	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
12386 
12387 tbl_sdregl:
12388 	short		sdregl0 - tbl_sdregl
12389 	short		sdregl1 - tbl_sdregl
12390 	short		sdregl2 - tbl_sdregl
12391 	short		sdregl3 - tbl_sdregl
12392 	short		sdregl4 - tbl_sdregl
12393 	short		sdregl5 - tbl_sdregl
12394 	short		sdregl6 - tbl_sdregl
12395 	short		sdregl7 - tbl_sdregl
12396 
12397 sdregl0:
12398 	mov.l		%d0,EXC_DREGS+0x0(%a6)
12399 	rts
12400 sdregl1:
12401 	mov.l		%d0,EXC_DREGS+0x4(%a6)
12402 	rts
12403 sdregl2:
12404 	mov.l		%d0,%d2
12405 	rts
12406 sdregl3:
12407 	mov.l		%d0,%d3
12408 	rts
12409 sdregl4:
12410 	mov.l		%d0,%d4
12411 	rts
12412 sdregl5:
12413 	mov.l		%d0,%d5
12414 	rts
12415 sdregl6:
12416 	mov.l		%d0,%d6
12417 	rts
12418 sdregl7:
12419 	mov.l		%d0,%d7
12420 	rts
12421 
12422 #########################################################################
12423 # XDEF ****************************************************************	#
12424 #	store_dreg_w(): store word to data register specified by d1	#
12425 #									#
12426 # XREF ****************************************************************	#
12427 #	None								#
12428 #									#
12429 # INPUT ***************************************************************	#
12430 #	d0 = word value to store					#
12431 #	d1 = index of register to fetch from				#
12432 #									#
12433 # OUTPUT **************************************************************	#
12434 #	(data register is updated)					#
12435 #									#
12436 # ALGORITHM ***********************************************************	#
12437 #	According to the index value in d1, store the word value	#
12438 # in d0 to the corresponding data register. D0/D1 are on the stack	#
12439 # while the rest are in their initial places.				#
12440 #									#
12441 #########################################################################
12442 
12443 	global		store_dreg_w
12444 store_dreg_w:
12445 	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
12446 	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
12447 
12448 tbl_sdregw:
12449 	short		sdregw0 - tbl_sdregw
12450 	short		sdregw1 - tbl_sdregw
12451 	short		sdregw2 - tbl_sdregw
12452 	short		sdregw3 - tbl_sdregw
12453 	short		sdregw4 - tbl_sdregw
12454 	short		sdregw5 - tbl_sdregw
12455 	short		sdregw6 - tbl_sdregw
12456 	short		sdregw7 - tbl_sdregw
12457 
12458 sdregw0:
12459 	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
12460 	rts
12461 sdregw1:
12462 	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
12463 	rts
12464 sdregw2:
12465 	mov.w		%d0,%d2
12466 	rts
12467 sdregw3:
12468 	mov.w		%d0,%d3
12469 	rts
12470 sdregw4:
12471 	mov.w		%d0,%d4
12472 	rts
12473 sdregw5:
12474 	mov.w		%d0,%d5
12475 	rts
12476 sdregw6:
12477 	mov.w		%d0,%d6
12478 	rts
12479 sdregw7:
12480 	mov.w		%d0,%d7
12481 	rts
12482 
12483 #########################################################################
12484 # XDEF ****************************************************************	#
12485 #	store_dreg_b(): store byte to data register specified by d1	#
12486 #									#
12487 # XREF ****************************************************************	#
12488 #	None								#
12489 #									#
12490 # INPUT ***************************************************************	#
12491 #	d0 = byte value to store					#
12492 #	d1 = index of register to fetch from				#
12493 #									#
12494 # OUTPUT **************************************************************	#
12495 #	(data register is updated)					#
12496 #									#
12497 # ALGORITHM ***********************************************************	#
12498 #	According to the index value in d1, store the byte value	#
12499 # in d0 to the corresponding data register. D0/D1 are on the stack	#
12500 # while the rest are in their initial places.				#
12501 #									#
12502 #########################################################################
12503 
12504 	global		store_dreg_b
12505 store_dreg_b:
12506 	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
12507 	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
12508 
12509 tbl_sdregb:
12510 	short		sdregb0 - tbl_sdregb
12511 	short		sdregb1 - tbl_sdregb
12512 	short		sdregb2 - tbl_sdregb
12513 	short		sdregb3 - tbl_sdregb
12514 	short		sdregb4 - tbl_sdregb
12515 	short		sdregb5 - tbl_sdregb
12516 	short		sdregb6 - tbl_sdregb
12517 	short		sdregb7 - tbl_sdregb
12518 
12519 sdregb0:
12520 	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
12521 	rts
12522 sdregb1:
12523 	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
12524 	rts
12525 sdregb2:
12526 	mov.b		%d0,%d2
12527 	rts
12528 sdregb3:
12529 	mov.b		%d0,%d3
12530 	rts
12531 sdregb4:
12532 	mov.b		%d0,%d4
12533 	rts
12534 sdregb5:
12535 	mov.b		%d0,%d5
12536 	rts
12537 sdregb6:
12538 	mov.b		%d0,%d6
12539 	rts
12540 sdregb7:
12541 	mov.b		%d0,%d7
12542 	rts
12543 
12544 #########################################################################
12545 # XDEF ****************************************************************	#
12546 #	inc_areg(): increment an address register by the value in d0	#
12547 #									#
12548 # XREF ****************************************************************	#
12549 #	None								#
12550 #									#
12551 # INPUT ***************************************************************	#
12552 #	d0 = amount to increment by					#
12553 #	d1 = index of address register to increment			#
12554 #									#
12555 # OUTPUT **************************************************************	#
12556 #	(address register is updated)					#
12557 #									#
12558 # ALGORITHM ***********************************************************	#
12559 #	Typically used for an instruction w/ a post-increment <ea>,	#
12560 # this routine adds the increment value in d0 to the address register	#
12561 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12562 # in their original places.						#
12563 #	For a7, if the increment amount is one, then we have to		#
12564 # increment by two. For any a7 update, set the mia7_flag so that if	#
12565 # an access error exception occurs later in emulation, this address	#
12566 # register update can be undone.					#
12567 #									#
12568 #########################################################################
12569 
12570 	global		inc_areg
12571 inc_areg:
12572 	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
12573 	jmp		(tbl_iareg.b,%pc,%d1.w*1)
12574 
12575 tbl_iareg:
12576 	short		iareg0 - tbl_iareg
12577 	short		iareg1 - tbl_iareg
12578 	short		iareg2 - tbl_iareg
12579 	short		iareg3 - tbl_iareg
12580 	short		iareg4 - tbl_iareg
12581 	short		iareg5 - tbl_iareg
12582 	short		iareg6 - tbl_iareg
12583 	short		iareg7 - tbl_iareg
12584 
12585 iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
12586 	rts
12587 iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
12588 	rts
12589 iareg2:	add.l		%d0,%a2
12590 	rts
12591 iareg3:	add.l		%d0,%a3
12592 	rts
12593 iareg4:	add.l		%d0,%a4
12594 	rts
12595 iareg5:	add.l		%d0,%a5
12596 	rts
12597 iareg6:	add.l		%d0,(%a6)
12598 	rts
12599 iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
12600 	cmpi.b		%d0,&0x1
12601 	beq.b		iareg7b
12602 	add.l		%d0,EXC_A7(%a6)
12603 	rts
12604 iareg7b:
12605 	addq.l		&0x2,EXC_A7(%a6)
12606 	rts
12607 
12608 #########################################################################
12609 # XDEF ****************************************************************	#
12610 #	dec_areg(): decrement an address register by the value in d0	#
12611 #									#
12612 # XREF ****************************************************************	#
12613 #	None								#
12614 #									#
12615 # INPUT ***************************************************************	#
12616 #	d0 = amount to decrement by					#
12617 #	d1 = index of address register to decrement			#
12618 #									#
12619 # OUTPUT **************************************************************	#
12620 #	(address register is updated)					#
12621 #									#
12622 # ALGORITHM ***********************************************************	#
12623 #	Typically used for an instruction w/ a pre-decrement <ea>,	#
12624 # this routine adds the decrement value in d0 to the address register	#
12625 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12626 # in their original places.						#
12627 #	For a7, if the decrement amount is one, then we have to		#
12628 # decrement by two. For any a7 update, set the mda7_flag so that if	#
12629 # an access error exception occurs later in emulation, this address	#
12630 # register update can be undone.					#
12631 #									#
12632 #########################################################################
12633 
12634 	global		dec_areg
12635 dec_areg:
12636 	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
12637 	jmp		(tbl_dareg.b,%pc,%d1.w*1)
12638 
12639 tbl_dareg:
12640 	short		dareg0 - tbl_dareg
12641 	short		dareg1 - tbl_dareg
12642 	short		dareg2 - tbl_dareg
12643 	short		dareg3 - tbl_dareg
12644 	short		dareg4 - tbl_dareg
12645 	short		dareg5 - tbl_dareg
12646 	short		dareg6 - tbl_dareg
12647 	short		dareg7 - tbl_dareg
12648 
12649 dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
12650 	rts
12651 dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
12652 	rts
12653 dareg2:	sub.l		%d0,%a2
12654 	rts
12655 dareg3:	sub.l		%d0,%a3
12656 	rts
12657 dareg4:	sub.l		%d0,%a4
12658 	rts
12659 dareg5:	sub.l		%d0,%a5
12660 	rts
12661 dareg6:	sub.l		%d0,(%a6)
12662 	rts
12663 dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
12664 	cmpi.b		%d0,&0x1
12665 	beq.b		dareg7b
12666 	sub.l		%d0,EXC_A7(%a6)
12667 	rts
12668 dareg7b:
12669 	subq.l		&0x2,EXC_A7(%a6)
12670 	rts
12671 
12672 ##############################################################################
12673 
12674 #########################################################################
12675 # XDEF ****************************************************************	#
12676 #	load_fpn1(): load FP register value into FP_SRC(a6).		#
12677 #									#
12678 # XREF ****************************************************************	#
12679 #	None								#
12680 #									#
12681 # INPUT ***************************************************************	#
12682 #	d0 = index of FP register to load				#
12683 #									#
12684 # OUTPUT **************************************************************	#
12685 #	FP_SRC(a6) = value loaded from FP register file			#
12686 #									#
12687 # ALGORITHM ***********************************************************	#
12688 #	Using the index in d0, load FP_SRC(a6) with a number from the	#
12689 # FP register file.							#
12690 #									#
12691 #########################################################################
12692 
12693 	global		load_fpn1
12694 load_fpn1:
12695 	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696 	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
12697 
12698 tbl_load_fpn1:
12699 	short		load_fpn1_0 - tbl_load_fpn1
12700 	short		load_fpn1_1 - tbl_load_fpn1
12701 	short		load_fpn1_2 - tbl_load_fpn1
12702 	short		load_fpn1_3 - tbl_load_fpn1
12703 	short		load_fpn1_4 - tbl_load_fpn1
12704 	short		load_fpn1_5 - tbl_load_fpn1
12705 	short		load_fpn1_6 - tbl_load_fpn1
12706 	short		load_fpn1_7 - tbl_load_fpn1
12707 
12708 load_fpn1_0:
12709 	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710 	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711 	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712 	lea		FP_SRC(%a6), %a0
12713 	rts
12714 load_fpn1_1:
12715 	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716 	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717 	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718 	lea		FP_SRC(%a6), %a0
12719 	rts
12720 load_fpn1_2:
12721 	fmovm.x		&0x20, FP_SRC(%a6)
12722 	lea		FP_SRC(%a6), %a0
12723 	rts
12724 load_fpn1_3:
12725 	fmovm.x		&0x10, FP_SRC(%a6)
12726 	lea		FP_SRC(%a6), %a0
12727 	rts
12728 load_fpn1_4:
12729 	fmovm.x		&0x08, FP_SRC(%a6)
12730 	lea		FP_SRC(%a6), %a0
12731 	rts
12732 load_fpn1_5:
12733 	fmovm.x		&0x04, FP_SRC(%a6)
12734 	lea		FP_SRC(%a6), %a0
12735 	rts
12736 load_fpn1_6:
12737 	fmovm.x		&0x02, FP_SRC(%a6)
12738 	lea		FP_SRC(%a6), %a0
12739 	rts
12740 load_fpn1_7:
12741 	fmovm.x		&0x01, FP_SRC(%a6)
12742 	lea		FP_SRC(%a6), %a0
12743 	rts
12744 
12745 #############################################################################
12746 
12747 #########################################################################
12748 # XDEF ****************************************************************	#
12749 #	load_fpn2(): load FP register value into FP_DST(a6).		#
12750 #									#
12751 # XREF ****************************************************************	#
12752 #	None								#
12753 #									#
12754 # INPUT ***************************************************************	#
12755 #	d0 = index of FP register to load				#
12756 #									#
12757 # OUTPUT **************************************************************	#
12758 #	FP_DST(a6) = value loaded from FP register file			#
12759 #									#
12760 # ALGORITHM ***********************************************************	#
12761 #	Using the index in d0, load FP_DST(a6) with a number from the	#
12762 # FP register file.							#
12763 #									#
12764 #########################################################################
12765 
12766 	global		load_fpn2
12767 load_fpn2:
12768 	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769 	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
12770 
12771 tbl_load_fpn2:
12772 	short		load_fpn2_0 - tbl_load_fpn2
12773 	short		load_fpn2_1 - tbl_load_fpn2
12774 	short		load_fpn2_2 - tbl_load_fpn2
12775 	short		load_fpn2_3 - tbl_load_fpn2
12776 	short		load_fpn2_4 - tbl_load_fpn2
12777 	short		load_fpn2_5 - tbl_load_fpn2
12778 	short		load_fpn2_6 - tbl_load_fpn2
12779 	short		load_fpn2_7 - tbl_load_fpn2
12780 
12781 load_fpn2_0:
12782 	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783 	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784 	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785 	lea		FP_DST(%a6), %a0
12786 	rts
12787 load_fpn2_1:
12788 	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789 	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790 	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791 	lea		FP_DST(%a6), %a0
12792 	rts
12793 load_fpn2_2:
12794 	fmovm.x		&0x20, FP_DST(%a6)
12795 	lea		FP_DST(%a6), %a0
12796 	rts
12797 load_fpn2_3:
12798 	fmovm.x		&0x10, FP_DST(%a6)
12799 	lea		FP_DST(%a6), %a0
12800 	rts
12801 load_fpn2_4:
12802 	fmovm.x		&0x08, FP_DST(%a6)
12803 	lea		FP_DST(%a6), %a0
12804 	rts
12805 load_fpn2_5:
12806 	fmovm.x		&0x04, FP_DST(%a6)
12807 	lea		FP_DST(%a6), %a0
12808 	rts
12809 load_fpn2_6:
12810 	fmovm.x		&0x02, FP_DST(%a6)
12811 	lea		FP_DST(%a6), %a0
12812 	rts
12813 load_fpn2_7:
12814 	fmovm.x		&0x01, FP_DST(%a6)
12815 	lea		FP_DST(%a6), %a0
12816 	rts
12817 
12818 #############################################################################
12819 
12820 #########################################################################
12821 # XDEF ****************************************************************	#
12822 #	store_fpreg(): store an fp value to the fpreg designated d0.	#
12823 #									#
12824 # XREF ****************************************************************	#
12825 #	None								#
12826 #									#
12827 # INPUT ***************************************************************	#
12828 #	fp0 = extended precision value to store				#
12829 #	d0  = index of floating-point register				#
12830 #									#
12831 # OUTPUT **************************************************************	#
12832 #	None								#
12833 #									#
12834 # ALGORITHM ***********************************************************	#
12835 #	Store the value in fp0 to the FP register designated by the	#
12836 # value in d0. The FP number can be DENORM or SNAN so we have to be	#
12837 # careful that we don't take an exception here.				#
12838 #									#
12839 #########################################################################
12840 
12841 	global		store_fpreg
12842 store_fpreg:
12843 	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844 	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
12845 
12846 tbl_store_fpreg:
12847 	short		store_fpreg_0 - tbl_store_fpreg
12848 	short		store_fpreg_1 - tbl_store_fpreg
12849 	short		store_fpreg_2 - tbl_store_fpreg
12850 	short		store_fpreg_3 - tbl_store_fpreg
12851 	short		store_fpreg_4 - tbl_store_fpreg
12852 	short		store_fpreg_5 - tbl_store_fpreg
12853 	short		store_fpreg_6 - tbl_store_fpreg
12854 	short		store_fpreg_7 - tbl_store_fpreg
12855 
12856 store_fpreg_0:
12857 	fmovm.x		&0x80, EXC_FP0(%a6)
12858 	rts
12859 store_fpreg_1:
12860 	fmovm.x		&0x80, EXC_FP1(%a6)
12861 	rts
12862 store_fpreg_2:
12863 	fmovm.x		&0x01, -(%sp)
12864 	fmovm.x		(%sp)+, &0x20
12865 	rts
12866 store_fpreg_3:
12867 	fmovm.x		&0x01, -(%sp)
12868 	fmovm.x		(%sp)+, &0x10
12869 	rts
12870 store_fpreg_4:
12871 	fmovm.x		&0x01, -(%sp)
12872 	fmovm.x		(%sp)+, &0x08
12873 	rts
12874 store_fpreg_5:
12875 	fmovm.x		&0x01, -(%sp)
12876 	fmovm.x		(%sp)+, &0x04
12877 	rts
12878 store_fpreg_6:
12879 	fmovm.x		&0x01, -(%sp)
12880 	fmovm.x		(%sp)+, &0x02
12881 	rts
12882 store_fpreg_7:
12883 	fmovm.x		&0x01, -(%sp)
12884 	fmovm.x		(%sp)+, &0x01
12885 	rts
12886 
12887 #########################################################################
12888 # XDEF ****************************************************************	#
12889 #	get_packed(): fetch a packed operand from memory and then	#
12890 #		      convert it to a floating-point binary number.	#
12891 #									#
12892 # XREF ****************************************************************	#
12893 #	_dcalc_ea() - calculate the correct <ea>			#
12894 #	_mem_read() - fetch the packed operand from memory		#
12895 #	facc_in_x() - the fetch failed so jump to special exit code	#
12896 #	decbin()    - convert packed to binary extended precision	#
12897 #									#
12898 # INPUT ***************************************************************	#
12899 #	None								#
12900 #									#
12901 # OUTPUT **************************************************************	#
12902 #	If no failure on _mem_read():					#
12903 #	FP_SRC(a6) = packed operand now as a binary FP number		#
12904 #									#
12905 # ALGORITHM ***********************************************************	#
12906 #	Get the correct <ea> which is the value on the exception stack	#
12907 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
12908 # Then, fetch the operand from memory. If the fetch fails, exit		#
12909 # through facc_in_x().							#
12910 #	If the packed operand is a ZERO,NAN, or INF, convert it to	#
12911 # its binary representation here. Else, call decbin() which will	#
12912 # convert the packed value to an extended precision binary value.	#
12913 #									#
12914 #########################################################################
12915 
12916 # the stacked <ea> for packed is correct except for -(An).
12917 # the base reg must be updated for both -(An) and (An)+.
12918 	global		get_packed
12919 get_packed:
12920 	mov.l		&0xc,%d0		# packed is 12 bytes
12921 	bsr.l		_dcalc_ea		# fetch <ea>; correct An
12922 
12923 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
12924 	mov.l		&0xc,%d0		# pass: 12 bytes
12925 	bsr.l		_dmem_read		# read packed operand
12926 
12927 	tst.l		%d1			# did dfetch fail?
12928 	bne.l		facc_in_x		# yes
12929 
12930 # The packed operand is an INF or a NAN if the exponent field is all ones.
12931 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
12932 	cmpi.w		%d0,&0x7fff		# INF or NAN?
12933 	bne.b		gp_try_zero		# no
12934 	rts					# operand is an INF or NAN
12935 
12936 # The packed operand is a zero if the mantissa is all zero, else it's
12937 # a normal packed op.
12938 gp_try_zero:
12939 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
12940 	andi.b		&0x0f,%d0		# clear all but last nybble
12941 	bne.b		gp_not_spec		# not a zero
12942 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
12943 	bne.b		gp_not_spec		# not a zero
12944 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
12945 	bne.b		gp_not_spec		# not a zero
12946 	rts					# operand is a ZERO
12947 gp_not_spec:
12948 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
12949 	bsr.l		decbin			# convert to extended
12950 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
12951 	rts
12952 
12953 #########################################################################
12954 # decbin(): Converts normalized packed bcd value pointed to by register	#
12955 #	    a0 to extended-precision value in fp0.			#
12956 #									#
12957 # INPUT ***************************************************************	#
12958 #	a0 = pointer to normalized packed bcd value			#
12959 #									#
12960 # OUTPUT **************************************************************	#
12961 #	fp0 = exact fp representation of the packed bcd value.		#
12962 #									#
12963 # ALGORITHM ***********************************************************	#
12964 #	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
12965 #	and NaN operands are dispatched without entering this routine)	#
12966 #	value in 68881/882 format at location (a0).			#
12967 #									#
12968 #	A1. Convert the bcd exponent to binary by successive adds and	#
12969 #	muls. Set the sign according to SE. Subtract 16 to compensate	#
12970 #	for the mantissa which is to be interpreted as 17 integer	#
12971 #	digits, rather than 1 integer and 16 fraction digits.		#
12972 #	Note: this operation can never overflow.			#
12973 #									#
12974 #	A2. Convert the bcd mantissa to binary by successive		#
12975 #	adds and muls in FP0. Set the sign according to SM.		#
12976 #	The mantissa digits will be converted with the decimal point	#
12977 #	assumed following the least-significant digit.			#
12978 #	Note: this operation can never overflow.			#
12979 #									#
12980 #	A3. Count the number of leading/trailing zeros in the		#
12981 #	bcd string.  If SE is positive, count the leading zeros;	#
12982 #	if negative, count the trailing zeros.  Set the adjusted	#
12983 #	exponent equal to the exponent from A1 and the zero count	#
12984 #	added if SM = 1 and subtracted if SM = 0.  Scale the		#
12985 #	mantissa the equivalent of forcing in the bcd value:		#
12986 #									#
12987 #	SM = 0	a non-zero digit in the integer position		#
12988 #	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
12989 #									#
12990 #	this will insure that any value, regardless of its		#
12991 #	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
12992 #	consistently.							#
12993 #									#
12994 #	A4. Calculate the factor 10^exp in FP1 using a table of		#
12995 #	10^(2^n) values.  To reduce the error in forming factors	#
12996 #	greater than 10^27, a directed rounding scheme is used with	#
12997 #	tables rounded to RN, RM, and RP, according to the table	#
12998 #	in the comments of the pwrten section.				#
12999 #									#
13000 #	A5. Form the final binary number by scaling the mantissa by	#
13001 #	the exponent factor.  This is done by multiplying the		#
13002 #	mantissa in FP0 by the factor in FP1 if the adjusted		#
13003 #	exponent sign is positive, and dividing FP0 by FP1 if		#
13004 #	it is negative.							#
13005 #									#
13006 #	Clean up and return. Check if the final mul or div was inexact.	#
13007 #	If so, set INEX1 in USER_FPSR.					#
13008 #									#
13009 #########################################################################
13010 
13011 #
13012 #	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013 #	to nearest, minus, and plus, respectively.  The tables include
13014 #	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13015 #	is required until the power is greater than 27, however, all
13016 #	tables include the first 5 for ease of indexing.
13017 #
13018 RTABLE:
13019 	byte		0,0,0,0
13020 	byte		2,3,2,3
13021 	byte		2,3,3,2
13022 	byte		3,2,2,3
13023 
13024 	set		FNIBS,7
13025 	set		FSTRT,0
13026 
13027 	set		ESTRT,4
13028 	set		EDIGITS,2
13029 
13030 	global		decbin
13031 decbin:
13032 	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033 	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034 	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
13035 
13036 	lea		FP_SCR0(%a6),%a0
13037 
13038 	movm.l		&0x3c00,-(%sp)		# save d2-d5
13039 	fmovm.x		&0x1,-(%sp)		# save fp1
13040 #
13041 # Calculate exponent:
13042 #  1. Copy bcd value in memory for use as a working copy.
13043 #  2. Calculate absolute value of exponent in d1 by mul and add.
13044 #  3. Correct for exponent sign.
13045 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046 #     (i.e., all digits assumed left of the decimal point.)
13047 #
13048 # Register usage:
13049 #
13050 #  calc_e:
13051 #	(*)  d0: temp digit storage
13052 #	(*)  d1: accumulator for binary exponent
13053 #	(*)  d2: digit count
13054 #	(*)  d3: offset pointer
13055 #	( )  d4: first word of bcd
13056 #	( )  a0: pointer to working bcd value
13057 #	( )  a6: pointer to original bcd value
13058 #	(*)  FP_SCR1: working copy of original bcd value
13059 #	(*)  L_SCR1: copy of original exponent word
13060 #
13061 calc_e:
13062 	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
13063 	mov.l		&ESTRT,%d3		# counter to pick up digits
13064 	mov.l		(%a0),%d4		# get first word of bcd
13065 	clr.l		%d1			# zero d1 for accumulator
13066 e_gd:
13067 	mulu.l		&0xa,%d1		# mul partial product by one digit place
13068 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
13069 	add.l		%d0,%d1			# d1 = d1 + d0
13070 	addq.b		&4,%d3			# advance d3 to the next digit
13071 	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
13072 	btst		&30,%d4			# get SE
13073 	beq.b		e_pos			# don't negate if pos
13074 	neg.l		%d1			# negate before subtracting
13075 e_pos:
13076 	sub.l		&16,%d1			# sub to compensate for shift of mant
13077 	bge.b		e_save			# if still pos, do not neg
13078 	neg.l		%d1			# now negative, make pos and set SE
13079 	or.l		&0x40000000,%d4		# set SE in d4,
13080 	or.l		&0x40000000,(%a0)	# and in working bcd
13081 e_save:
13082 	mov.l		%d1,-(%sp)		# save exp on stack
13083 #
13084 #
13085 # Calculate mantissa:
13086 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
13087 #  2. Correct for mantissa sign.
13088 #     (i.e., all digits assumed left of the decimal point.)
13089 #
13090 # Register usage:
13091 #
13092 #  calc_m:
13093 #	(*)  d0: temp digit storage
13094 #	(*)  d1: lword counter
13095 #	(*)  d2: digit count
13096 #	(*)  d3: offset pointer
13097 #	( )  d4: words 2 and 3 of bcd
13098 #	( )  a0: pointer to working bcd value
13099 #	( )  a6: pointer to original bcd value
13100 #	(*) fp0: mantissa accumulator
13101 #	( )  FP_SCR1: working copy of original bcd value
13102 #	( )  L_SCR1: copy of original exponent word
13103 #
13104 calc_m:
13105 	mov.l		&1,%d1			# word counter, init to 1
13106 	fmov.s		&0x00000000,%fp0	# accumulator
13107 #
13108 #
13109 #  Since the packed number has a long word between the first & second parts,
13110 #  get the integer digit then skip down & get the rest of the
13111 #  mantissa.  We will unroll the loop once.
13112 #
13113 	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
13114 	fadd.b		%d0,%fp0		# add digit to sum in fp0
13115 #
13116 #
13117 #  Get the rest of the mantissa.
13118 #
13119 loadlw:
13120 	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
13121 	mov.l		&FSTRT,%d3		# counter to pick up digits
13122 	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
13123 md2b:
13124 	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
13125 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
13126 	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
13127 #
13128 #
13129 #  If all the digits (8) in that long word have been converted (d2=0),
13130 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
13131 #  to initialize the digit offset, and set d2 to 7 for the digit count;
13132 #  else continue with this long word.
13133 #
13134 	addq.b		&4,%d3			# advance d3 to the next digit
13135 	dbf.w		%d2,md2b		# check for last digit in this lw
13136 nextlw:
13137 	addq.l		&1,%d1			# inc lw pointer in mantissa
13138 	cmp.l		%d1,&2			# test for last lw
13139 	ble.b		loadlw			# if not, get last one
13140 #
13141 #  Check the sign of the mant and make the value in fp0 the same sign.
13142 #
13143 m_sign:
13144 	btst		&31,(%a0)		# test sign of the mantissa
13145 	beq.b		ap_st_z			# if clear, go to append/strip zeros
13146 	fneg.x		%fp0			# if set, negate fp0
13147 #
13148 # Append/strip zeros:
13149 #
13150 #  For adjusted exponents which have an absolute value greater than 27*,
13151 #  this routine calculates the amount needed to normalize the mantissa
13152 #  for the adjusted exponent.  That number is subtracted from the exp
13153 #  if the exp was positive, and added if it was negative.  The purpose
13154 #  of this is to reduce the value of the exponent and the possibility
13155 #  of error in calculation of pwrten.
13156 #
13157 #  1. Branch on the sign of the adjusted exponent.
13158 #  2p.(positive exp)
13159 #   2. Check M16 and the digits in lwords 2 and 3 in descending order.
13160 #   3. Add one for each zero encountered until a non-zero digit.
13161 #   4. Subtract the count from the exp.
13162 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
13163 #	   and set SE.
13164 #	6. Multiply the mantissa by 10**count.
13165 #  2n.(negative exp)
13166 #   2. Check the digits in lwords 3 and 2 in descending order.
13167 #   3. Add one for each zero encountered until a non-zero digit.
13168 #   4. Add the count to the exp.
13169 #   5. Check if the exp has crossed zero in #3 above; clear SE.
13170 #   6. Divide the mantissa by 10**count.
13171 #
13172 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13173 #   any adjustment due to append/strip zeros will drive the resultane
13174 #   exponent towards zero.  Since all pwrten constants with a power
13175 #   of 27 or less are exact, there is no need to use this routine to
13176 #   attempt to lessen the resultant exponent.
13177 #
13178 # Register usage:
13179 #
13180 #  ap_st_z:
13181 #	(*)  d0: temp digit storage
13182 #	(*)  d1: zero count
13183 #	(*)  d2: digit count
13184 #	(*)  d3: offset pointer
13185 #	( )  d4: first word of bcd
13186 #	(*)  d5: lword counter
13187 #	( )  a0: pointer to working bcd value
13188 #	( )  FP_SCR1: working copy of original bcd value
13189 #	( )  L_SCR1: copy of original exponent word
13190 #
13191 #
13192 # First check the absolute value of the exponent to see if this
13193 # routine is necessary.  If so, then check the sign of the exponent
13194 # and do append (+) or strip (-) zeros accordingly.
13195 # This section handles a positive adjusted exponent.
13196 #
13197 ap_st_z:
13198 	mov.l		(%sp),%d1		# load expA for range test
13199 	cmp.l		%d1,&27			# test is with 27
13200 	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
13201 	btst		&30,(%a0)		# check sign of exp
13202 	bne.b		ap_st_n			# if neg, go to neg side
13203 	clr.l		%d1			# zero count reg
13204 	mov.l		(%a0),%d4		# load lword 1 to d4
13205 	bfextu		%d4{&28:&4},%d0		# get M16 in d0
13206 	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
13207 	addq.l		&1,%d1			# inc zero count
13208 	mov.l		&1,%d5			# init lword counter
13209 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
13210 	bne.b		ap_p_cl			# if lw 2 is zero, skip it
13211 	addq.l		&8,%d1			# and inc count by 8
13212 	addq.l		&1,%d5			# inc lword counter
13213 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
13214 ap_p_cl:
13215 	clr.l		%d3			# init offset reg
13216 	mov.l		&7,%d2			# init digit counter
13217 ap_p_gd:
13218 	bfextu		%d4{%d3:&4},%d0		# get digit
13219 	bne.b		ap_p_fx			# if non-zero, go to fix exp
13220 	addq.l		&4,%d3			# point to next digit
13221 	addq.l		&1,%d1			# inc digit counter
13222 	dbf.w		%d2,ap_p_gd		# get next digit
13223 ap_p_fx:
13224 	mov.l		%d1,%d0			# copy counter to d2
13225 	mov.l		(%sp),%d1		# get adjusted exp from memory
13226 	sub.l		%d0,%d1			# subtract count from exp
13227 	bge.b		ap_p_fm			# if still pos, go to pwrten
13228 	neg.l		%d1			# now its neg; get abs
13229 	mov.l		(%a0),%d4		# load lword 1 to d4
13230 	or.l		&0x40000000,%d4		# and set SE in d4
13231 	or.l		&0x40000000,(%a0)	# and in memory
13232 #
13233 # Calculate the mantissa multiplier to compensate for the striping of
13234 # zeros from the mantissa.
13235 #
13236 ap_p_fm:
13237 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13238 	clr.l		%d3			# init table index
13239 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13240 	mov.l		&3,%d2			# init d2 to count bits in counter
13241 ap_p_el:
13242 	asr.l		&1,%d0			# shift lsb into carry
13243 	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
13244 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13245 ap_p_en:
13246 	add.l		&12,%d3			# inc d3 to next rtable entry
13247 	tst.l		%d0			# check if d0 is zero
13248 	bne.b		ap_p_el			# if not, get next bit
13249 	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
13250 	bra.b		pwrten			# go calc pwrten
13251 #
13252 # This section handles a negative adjusted exponent.
13253 #
13254 ap_st_n:
13255 	clr.l		%d1			# clr counter
13256 	mov.l		&2,%d5			# set up d5 to point to lword 3
13257 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
13258 	bne.b		ap_n_cl			# if not zero, check digits
13259 	sub.l		&1,%d5			# dec d5 to point to lword 2
13260 	addq.l		&8,%d1			# inc counter by 8
13261 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
13262 ap_n_cl:
13263 	mov.l		&28,%d3			# point to last digit
13264 	mov.l		&7,%d2			# init digit counter
13265 ap_n_gd:
13266 	bfextu		%d4{%d3:&4},%d0		# get digit
13267 	bne.b		ap_n_fx			# if non-zero, go to exp fix
13268 	subq.l		&4,%d3			# point to previous digit
13269 	addq.l		&1,%d1			# inc digit counter
13270 	dbf.w		%d2,ap_n_gd		# get next digit
13271 ap_n_fx:
13272 	mov.l		%d1,%d0			# copy counter to d0
13273 	mov.l		(%sp),%d1		# get adjusted exp from memory
13274 	sub.l		%d0,%d1			# subtract count from exp
13275 	bgt.b		ap_n_fm			# if still pos, go fix mantissa
13276 	neg.l		%d1			# take abs of exp and clr SE
13277 	mov.l		(%a0),%d4		# load lword 1 to d4
13278 	and.l		&0xbfffffff,%d4		# and clr SE in d4
13279 	and.l		&0xbfffffff,(%a0)	# and in memory
13280 #
13281 # Calculate the mantissa multiplier to compensate for the appending of
13282 # zeros to the mantissa.
13283 #
13284 ap_n_fm:
13285 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13286 	clr.l		%d3			# init table index
13287 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13288 	mov.l		&3,%d2			# init d2 to count bits in counter
13289 ap_n_el:
13290 	asr.l		&1,%d0			# shift lsb into carry
13291 	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
13292 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13293 ap_n_en:
13294 	add.l		&12,%d3			# inc d3 to next rtable entry
13295 	tst.l		%d0			# check if d0 is zero
13296 	bne.b		ap_n_el			# if not, get next bit
13297 	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
13298 #
13299 #
13300 # Calculate power-of-ten factor from adjusted and shifted exponent.
13301 #
13302 # Register usage:
13303 #
13304 #  pwrten:
13305 #	(*)  d0: temp
13306 #	( )  d1: exponent
13307 #	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308 #	(*)  d3: FPCR work copy
13309 #	( )  d4: first word of bcd
13310 #	(*)  a1: RTABLE pointer
13311 #  calc_p:
13312 #	(*)  d0: temp
13313 #	( )  d1: exponent
13314 #	(*)  d3: PWRTxx table index
13315 #	( )  a0: pointer to working copy of bcd
13316 #	(*)  a1: PWRTxx pointer
13317 #	(*) fp1: power-of-ten accumulator
13318 #
13319 # Pwrten calculates the exponent factor in the selected rounding mode
13320 # according to the following table:
13321 #
13322 #	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13323 #
13324 #	ANY	  ANY	RN	RN
13325 #
13326 #	 +	   +	RP	RP
13327 #	 -	   +	RP	RM
13328 #	 +	   -	RP	RM
13329 #	 -	   -	RP	RP
13330 #
13331 #	 +	   +	RM	RM
13332 #	 -	   +	RM	RP
13333 #	 +	   -	RM	RP
13334 #	 -	   -	RM	RM
13335 #
13336 #	 +	   +	RZ	RM
13337 #	 -	   +	RZ	RM
13338 #	 +	   -	RZ	RP
13339 #	 -	   -	RZ	RP
13340 #
13341 #
13342 pwrten:
13343 	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
13344 	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
13345 	mov.l		(%a0),%d4		# reload 1st bcd word to d4
13346 	asl.l		&2,%d2			# format d2 to be
13347 	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
13348 	add.l		%d0,%d2			# in d2 as index into RTABLE
13349 	lea.l		RTABLE(%pc),%a1		# load rtable base
13350 	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
13351 	clr.l		%d3			# clear d3 to force no exc and extended
13352 	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
13353 	fmov.l		%d3,%fpcr		# write new FPCR
13354 	asr.l		&1,%d0			# write correct PTENxx table
13355 	bcc.b		not_rp			# to a1
13356 	lea.l		PTENRP(%pc),%a1		# it is RP
13357 	bra.b		calc_p			# go to init section
13358 not_rp:
13359 	asr.l		&1,%d0			# keep checking
13360 	bcc.b		not_rm
13361 	lea.l		PTENRM(%pc),%a1		# it is RM
13362 	bra.b		calc_p			# go to init section
13363 not_rm:
13364 	lea.l		PTENRN(%pc),%a1		# it is RN
13365 calc_p:
13366 	mov.l		%d1,%d0			# copy exp to d0;use d0
13367 	bpl.b		no_neg			# if exp is negative,
13368 	neg.l		%d0			# invert it
13369 	or.l		&0x40000000,(%a0)	# and set SE bit
13370 no_neg:
13371 	clr.l		%d3			# table index
13372 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13373 e_loop:
13374 	asr.l		&1,%d0			# shift next bit into carry
13375 	bcc.b		e_next			# if zero, skip the mul
13376 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13377 e_next:
13378 	add.l		&12,%d3			# inc d3 to next rtable entry
13379 	tst.l		%d0			# check if d0 is zero
13380 	bne.b		e_loop			# not zero, continue shifting
13381 #
13382 #
13383 #  Check the sign of the adjusted exp and make the value in fp0 the
13384 #  same sign. If the exp was pos then multiply fp1*fp0;
13385 #  else divide fp0/fp1.
13386 #
13387 # Register Usage:
13388 #  norm:
13389 #	( )  a0: pointer to working bcd value
13390 #	(*) fp0: mantissa accumulator
13391 #	( ) fp1: scaling factor - 10**(abs(exp))
13392 #
13393 pnorm:
13394 	btst		&30,(%a0)		# test the sign of the exponent
13395 	beq.b		mul			# if clear, go to multiply
13396 div:
13397 	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
13398 	bra.b		end_dec
13399 mul:
13400 	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
13401 #
13402 #
13403 # Clean up and return with result in fp0.
13404 #
13405 # If the final mul/div in decbin incurred an inex exception,
13406 # it will be inex2, but will be reported as inex1 by get_op.
13407 #
13408 end_dec:
13409 	fmov.l		%fpsr,%d0		# get status register
13410 	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
13411 	beq.b		no_exc			# skip this if no exc
13412 	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413 no_exc:
13414 	add.l		&0x4,%sp		# clear 1 lw param
13415 	fmovm.x		(%sp)+,&0x40		# restore fp1
13416 	movm.l		(%sp)+,&0x3c		# restore d2-d5
13417 	fmov.l		&0x0,%fpcr
13418 	fmov.l		&0x0,%fpsr
13419 	rts
13420 
13421 #########################################################################
13422 # bindec(): Converts an input in extended precision format to bcd format#
13423 #									#
13424 # INPUT ***************************************************************	#
13425 #	a0 = pointer to the input extended precision value in memory.	#
13426 #	     the input may be either normalized, unnormalized, or	#
13427 #	     denormalized.						#
13428 #	d0 = contains the k-factor sign-extended to 32-bits.		#
13429 #									#
13430 # OUTPUT **************************************************************	#
13431 #	FP_SCR0(a6) = bcd format result on the stack.			#
13432 #									#
13433 # ALGORITHM ***********************************************************	#
13434 #									#
13435 #	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
13436 #		The k-factor is saved for use in d7. Clear the		#
13437 #		BINDEC_FLG for separating normalized/denormalized	#
13438 #		input.  If input is unnormalized or denormalized,	#
13439 #		normalize it.						#
13440 #									#
13441 #	A2.	Set X = abs(input).					#
13442 #									#
13443 #	A3.	Compute ILOG.						#
13444 #		ILOG is the log base 10 of the input value.  It is	#
13445 #		approximated by adding e + 0.f when the original	#
13446 #		value is viewed as 2^^e * 1.f in extended precision.	#
13447 #		This value is stored in d6.				#
13448 #									#
13449 #	A4.	Clr INEX bit.						#
13450 #		The operation in A3 above may have set INEX2.		#
13451 #									#
13452 #	A5.	Set ICTR = 0;						#
13453 #		ICTR is a flag used in A13.  It must be set before the	#
13454 #		loop entry A6.						#
13455 #									#
13456 #	A6.	Calculate LEN.						#
13457 #		LEN is the number of digits to be displayed.  The	#
13458 #		k-factor can dictate either the total number of digits,	#
13459 #		if it is a positive number, or the number of digits	#
13460 #		after the decimal point which are to be included as	#
13461 #		significant.  See the 68882 manual for examples.	#
13462 #		If LEN is computed to be greater than 17, set OPERR in	#
13463 #		USER_FPSR.  LEN is stored in d4.			#
13464 #									#
13465 #	A7.	Calculate SCALE.					#
13466 #		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
13467 #		of decimal places needed to insure LEN integer digits	#
13468 #		in the output before conversion to bcd. LAMBDA is the	#
13469 #		sign of ISCALE, used in A9. Fp1 contains		#
13470 #		10^^(abs(ISCALE)) using a rounding mode which is a	#
13471 #		function of the original rounding mode and the signs	#
13472 #		of ISCALE and X.  A table is given in the code.		#
13473 #									#
13474 #	A8.	Clr INEX; Force RZ.					#
13475 #		The operation in A3 above may have set INEX2.		#
13476 #		RZ mode is forced for the scaling operation to insure	#
13477 #		only one rounding error.  The grs bits are collected in #
13478 #		the INEX flag for use in A10.				#
13479 #									#
13480 #	A9.	Scale X -> Y.						#
13481 #		The mantissa is scaled to the desired number of		#
13482 #		significant digits.  The excess digits are collected	#
13483 #		in INEX2.						#
13484 #									#
13485 #	A10.	Or in INEX.						#
13486 #		If INEX is set, round error occurred.  This is		#
13487 #		compensated for by 'or-ing' in the INEX2 flag to	#
13488 #		the lsb of Y.						#
13489 #									#
13490 #	A11.	Restore original FPCR; set size ext.			#
13491 #		Perform FINT operation in the user's rounding mode.	#
13492 #		Keep the size to extended.				#
13493 #									#
13494 #	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
13495 #		mode.  The FPSP routine sintd0 is used.  The output	#
13496 #		is in fp0.						#
13497 #									#
13498 #	A13.	Check for LEN digits.					#
13499 #		If the int operation results in more than LEN digits,	#
13500 #		or less than LEN -1 digits, adjust ILOG and repeat from	#
13501 #		A6.  This test occurs only on the first pass.  If the	#
13502 #		result is exactly 10^LEN, decrement ILOG and divide	#
13503 #		the mantissa by 10.					#
13504 #									#
13505 #	A14.	Convert the mantissa to bcd.				#
13506 #		The binstr routine is used to convert the LEN digit	#
13507 #		mantissa to bcd in memory.  The input to binstr is	#
13508 #		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
13509 #		such that the decimal point is to the left of bit 63.	#
13510 #		The bcd digits are stored in the correct position in	#
13511 #		the final string area in memory.			#
13512 #									#
13513 #	A15.	Convert the exponent to bcd.				#
13514 #		As in A14 above, the exp is converted to bcd and the	#
13515 #		digits are stored in the final string.			#
13516 #		Test the length of the final exponent string.  If the	#
13517 #		length is 4, set operr.					#
13518 #									#
13519 #	A16.	Write sign bits to final string.			#
13520 #									#
13521 #########################################################################
13522 
13523 set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
13524 
13525 # Constants in extended precision
13526 PLOG2:
13527 	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528 PLOG2UP1:
13529 	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530 
13531 # Constants in single precision
13532 FONE:
13533 	long		0x3F800000,0x00000000,0x00000000,0x00000000
13534 FTWO:
13535 	long		0x40000000,0x00000000,0x00000000,0x00000000
13536 FTEN:
13537 	long		0x41200000,0x00000000,0x00000000,0x00000000
13538 F4933:
13539 	long		0x459A2800,0x00000000,0x00000000,0x00000000
13540 
13541 RBDTBL:
13542 	byte		0,0,0,0
13543 	byte		3,3,2,2
13544 	byte		3,2,2,3
13545 	byte		2,3,3,2
13546 
13547 #	Implementation Notes:
13548 #
13549 #	The registers are used as follows:
13550 #
13551 #		d0: scratch; LEN input to binstr
13552 #		d1: scratch
13553 #		d2: upper 32-bits of mantissa for binstr
13554 #		d3: scratch;lower 32-bits of mantissa for binstr
13555 #		d4: LEN
13556 #		d5: LAMBDA/ICTR
13557 #		d6: ILOG
13558 #		d7: k-factor
13559 #		a0: ptr for original operand/final result
13560 #		a1: scratch pointer
13561 #		a2: pointer to FP_X; abs(original value) in ext
13562 #		fp0: scratch
13563 #		fp1: scratch
13564 #		fp2: scratch
13565 #		F_SCR1:
13566 #		F_SCR2:
13567 #		L_SCR1:
13568 #		L_SCR2:
13569 
13570 	global		bindec
13571 bindec:
13572 	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
13573 	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
13574 
13575 # A1. Set RM and size ext. Set SIGMA = sign input;
13576 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13577 #     separating  normalized/denormalized input.  If the input
13578 #     is a denormalized number, set the BINDEC_FLG memory word
13579 #     to signal denorm.  If the input is unnormalized, normalize
13580 #     the input and test for denormalized result.
13581 #
13582 	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
13583 	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
13584 	mov.l		%d0,%d7		# move k-factor to d7
13585 
13586 	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
13587 	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
13588 	bne.w		A2_str		# no; input is a NORM
13589 
13590 #
13591 # Normalize the denorm
13592 #
13593 un_de_norm:
13594 	mov.w		(%a0),%d0
13595 	and.w		&0x7fff,%d0	# strip sign of normalized exp
13596 	mov.l		4(%a0),%d1
13597 	mov.l		8(%a0),%d2
13598 norm_loop:
13599 	sub.w		&1,%d0
13600 	lsl.l		&1,%d2
13601 	roxl.l		&1,%d1
13602 	tst.l		%d1
13603 	bge.b		norm_loop
13604 #
13605 # Test if the normalized input is denormalized
13606 #
13607 	tst.w		%d0
13608 	bgt.b		pos_exp		# if greater than zero, it is a norm
13609 	st		BINDEC_FLG(%a6)	# set flag for denorm
13610 pos_exp:
13611 	and.w		&0x7fff,%d0	# strip sign of normalized exp
13612 	mov.w		%d0,(%a0)
13613 	mov.l		%d1,4(%a0)
13614 	mov.l		%d2,8(%a0)
13615 
13616 # A2. Set X = abs(input).
13617 #
13618 A2_str:
13619 	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
13620 	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
13621 	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
13622 	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
13623 
13624 # A3. Compute ILOG.
13625 #     ILOG is the log base 10 of the input value.  It is approx-
13626 #     imated by adding e + 0.f when the original value is viewed
13627 #     as 2^^e * 1.f in extended precision.  This value is stored
13628 #     in d6.
13629 #
13630 # Register usage:
13631 #	Input/Output
13632 #	d0: k-factor/exponent
13633 #	d2: x/x
13634 #	d3: x/x
13635 #	d4: x/x
13636 #	d5: x/x
13637 #	d6: x/ILOG
13638 #	d7: k-factor/Unchanged
13639 #	a0: ptr for original operand/final result
13640 #	a1: x/x
13641 #	a2: x/x
13642 #	fp0: x/float(ILOG)
13643 #	fp1: x/x
13644 #	fp2: x/x
13645 #	F_SCR1:x/x
13646 #	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647 #	L_SCR1:x/x
13648 #	L_SCR2:first word of X packed/Unchanged
13649 
13650 	tst.b		BINDEC_FLG(%a6)	# check for denorm
13651 	beq.b		A3_cont		# if clr, continue with norm
13652 	mov.l		&-4933,%d6	# force ILOG = -4933
13653 	bra.b		A4_str
13654 A3_cont:
13655 	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
13656 	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
13657 	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
13658 	sub.w		&0x3fff,%d0	# strip off bias
13659 	fadd.w		%d0,%fp0	# add in exp
13660 	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
13661 	fbge.w		pos_res		# if pos, branch
13662 	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
13663 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13664 	bra.b		A4_str		# go move out ILOG
13665 pos_res:
13666 	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
13667 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13668 
13669 
13670 # A4. Clr INEX bit.
13671 #     The operation in A3 above may have set INEX2.
13672 
13673 A4_str:
13674 	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
13675 
13676 
13677 # A5. Set ICTR = 0;
13678 #     ICTR is a flag used in A13.  It must be set before the
13679 #     loop entry A6. The lower word of d5 is used for ICTR.
13680 
13681 	clr.w		%d5		# clear ICTR
13682 
13683 # A6. Calculate LEN.
13684 #     LEN is the number of digits to be displayed.  The k-factor
13685 #     can dictate either the total number of digits, if it is
13686 #     a positive number, or the number of digits after the
13687 #     original decimal point which are to be included as
13688 #     significant.  See the 68882 manual for examples.
13689 #     If LEN is computed to be greater than 17, set OPERR in
13690 #     USER_FPSR.  LEN is stored in d4.
13691 #
13692 # Register usage:
13693 #	Input/Output
13694 #	d0: exponent/Unchanged
13695 #	d2: x/x/scratch
13696 #	d3: x/x
13697 #	d4: exc picture/LEN
13698 #	d5: ICTR/Unchanged
13699 #	d6: ILOG/Unchanged
13700 #	d7: k-factor/Unchanged
13701 #	a0: ptr for original operand/final result
13702 #	a1: x/x
13703 #	a2: x/x
13704 #	fp0: float(ILOG)/Unchanged
13705 #	fp1: x/x
13706 #	fp2: x/x
13707 #	F_SCR1:x/x
13708 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709 #	L_SCR1:x/x
13710 #	L_SCR2:first word of X packed/Unchanged
13711 
13712 A6_str:
13713 	tst.l		%d7		# branch on sign of k
13714 	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
13715 	mov.l		%d7,%d4		# if k > 0, LEN = k
13716 	bra.b		len_ck		# skip to LEN check
13717 k_neg:
13718 	mov.l		%d6,%d4		# first load ILOG to d4
13719 	sub.l		%d7,%d4		# subtract off k
13720 	addq.l		&1,%d4		# add in the 1
13721 len_ck:
13722 	tst.l		%d4		# LEN check: branch on sign of LEN
13723 	ble.b		LEN_ng		# if neg, set LEN = 1
13724 	cmp.l		%d4,&17		# test if LEN > 17
13725 	ble.b		A7_str		# if not, forget it
13726 	mov.l		&17,%d4		# set max LEN = 17
13727 	tst.l		%d7		# if negative, never set OPERR
13728 	ble.b		A7_str		# if positive, continue
13729 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
13730 	bra.b		A7_str		# finished here
13731 LEN_ng:
13732 	mov.l		&1,%d4		# min LEN is 1
13733 
13734 
13735 # A7. Calculate SCALE.
13736 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
13737 #     of decimal places needed to insure LEN integer digits
13738 #     in the output before conversion to bcd. LAMBDA is the sign
13739 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13740 #     the rounding mode as given in the following table (see
13741 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742 #     of opposite sign in bindec.sa from Coonen).
13743 #
13744 #	Initial					USE
13745 #	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
13746 #	----------------------------------------------
13747 #	 RN	00	   0	   0		00/0	RN
13748 #	 RN	00	   0	   1		00/0	RN
13749 #	 RN	00	   1	   0		00/0	RN
13750 #	 RN	00	   1	   1		00/0	RN
13751 #	 RZ	01	   0	   0		11/3	RP
13752 #	 RZ	01	   0	   1		11/3	RP
13753 #	 RZ	01	   1	   0		10/2	RM
13754 #	 RZ	01	   1	   1		10/2	RM
13755 #	 RM	10	   0	   0		11/3	RP
13756 #	 RM	10	   0	   1		10/2	RM
13757 #	 RM	10	   1	   0		10/2	RM
13758 #	 RM	10	   1	   1		11/3	RP
13759 #	 RP	11	   0	   0		10/2	RM
13760 #	 RP	11	   0	   1		11/3	RP
13761 #	 RP	11	   1	   0		11/3	RP
13762 #	 RP	11	   1	   1		10/2	RM
13763 #
13764 # Register usage:
13765 #	Input/Output
13766 #	d0: exponent/scratch - final is 0
13767 #	d2: x/0 or 24 for A9
13768 #	d3: x/scratch - offset ptr into PTENRM array
13769 #	d4: LEN/Unchanged
13770 #	d5: 0/ICTR:LAMBDA
13771 #	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772 #	d7: k-factor/Unchanged
13773 #	a0: ptr for original operand/final result
13774 #	a1: x/ptr to PTENRM array
13775 #	a2: x/x
13776 #	fp0: float(ILOG)/Unchanged
13777 #	fp1: x/10^ISCALE
13778 #	fp2: x/x
13779 #	F_SCR1:x/x
13780 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781 #	L_SCR1:x/x
13782 #	L_SCR2:first word of X packed/Unchanged
13783 
13784 A7_str:
13785 	tst.l		%d7		# test sign of k
13786 	bgt.b		k_pos		# if pos and > 0, skip this
13787 	cmp.l		%d7,%d6		# test k - ILOG
13788 	blt.b		k_pos		# if ILOG >= k, skip this
13789 	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
13790 k_pos:
13791 	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
13792 	addq.l		&1,%d0		# add the 1
13793 	sub.l		%d4,%d0		# sub off LEN
13794 	swap		%d5		# use upper word of d5 for LAMBDA
13795 	clr.w		%d5		# set it zero initially
13796 	clr.w		%d2		# set up d2 for very small case
13797 	tst.l		%d0		# test sign of ISCALE
13798 	bge.b		iscale		# if pos, skip next inst
13799 	addq.w		&1,%d5		# if neg, set LAMBDA true
13800 	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
13801 	bgt.b		no_inf		# if false, skip rest
13802 	add.l		&24,%d0		# add in 24 to iscale
13803 	mov.l		&24,%d2		# put 24 in d2 for A9
13804 no_inf:
13805 	neg.l		%d0		# and take abs of ISCALE
13806 iscale:
13807 	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
13808 	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
13809 	lsl.w		&1,%d1		# put them in bits 2:1
13810 	add.w		%d5,%d1		# add in LAMBDA
13811 	lsl.w		&1,%d1		# put them in bits 3:1
13812 	tst.l		L_SCR2(%a6)	# test sign of original x
13813 	bge.b		x_pos		# if pos, don't set bit 0
13814 	addq.l		&1,%d1		# if neg, set bit 0
13815 x_pos:
13816 	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
13817 	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
13818 	lsl.l		&4,%d3		# put bits in proper position
13819 	fmov.l		%d3,%fpcr	# load bits into fpu
13820 	lsr.l		&4,%d3		# put bits in proper position
13821 	tst.b		%d3		# decode new rmode for pten table
13822 	bne.b		not_rn		# if zero, it is RN
13823 	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
13824 	bra.b		rmode		# exit decode
13825 not_rn:
13826 	lsr.b		&1,%d3		# get lsb in carry
13827 	bcc.b		not_rp2		# if carry clear, it is RM
13828 	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
13829 	bra.b		rmode		# exit decode
13830 not_rp2:
13831 	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
13832 rmode:
13833 	clr.l		%d3		# clr table index
13834 e_loop2:
13835 	lsr.l		&1,%d0		# shift next bit into carry
13836 	bcc.b		e_next2		# if zero, skip the mul
13837 	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
13838 e_next2:
13839 	add.l		&12,%d3		# inc d3 to next pwrten table entry
13840 	tst.l		%d0		# test if ISCALE is zero
13841 	bne.b		e_loop2		# if not, loop
13842 
13843 # A8. Clr INEX; Force RZ.
13844 #     The operation in A3 above may have set INEX2.
13845 #     RZ mode is forced for the scaling operation to insure
13846 #     only one rounding error.  The grs bits are collected in
13847 #     the INEX flag for use in A10.
13848 #
13849 # Register usage:
13850 #	Input/Output
13851 
13852 	fmov.l		&0,%fpsr	# clr INEX
13853 	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
13854 
13855 # A9. Scale X -> Y.
13856 #     The mantissa is scaled to the desired number of significant
13857 #     digits.  The excess digits are collected in INEX2. If mul,
13858 #     Check d2 for excess 10 exponential value.  If not zero,
13859 #     the iscale value would have caused the pwrten calculation
13860 #     to overflow.  Only a negative iscale can cause this, so
13861 #     multiply by 10^(d2), which is now only allowed to be 24,
13862 #     with a multiply by 10^8 and 10^16, which is exact since
13863 #     10^24 is exact.  If the input was denormalized, we must
13864 #     create a busy stack frame with the mul command and the
13865 #     two operands, and allow the fpu to complete the multiply.
13866 #
13867 # Register usage:
13868 #	Input/Output
13869 #	d0: FPCR with RZ mode/Unchanged
13870 #	d2: 0 or 24/unchanged
13871 #	d3: x/x
13872 #	d4: LEN/Unchanged
13873 #	d5: ICTR:LAMBDA
13874 #	d6: ILOG/Unchanged
13875 #	d7: k-factor/Unchanged
13876 #	a0: ptr for original operand/final result
13877 #	a1: ptr to PTENRM array/Unchanged
13878 #	a2: x/x
13879 #	fp0: float(ILOG)/X adjusted for SCALE (Y)
13880 #	fp1: 10^ISCALE/Unchanged
13881 #	fp2: x/x
13882 #	F_SCR1:x/x
13883 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884 #	L_SCR1:x/x
13885 #	L_SCR2:first word of X packed/Unchanged
13886 
13887 A9_str:
13888 	fmov.x		(%a0),%fp0	# load X from memory
13889 	fabs.x		%fp0		# use abs(X)
13890 	tst.w		%d5		# LAMBDA is in lower word of d5
13891 	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
13892 	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
13893 	bra.w		A10_st		# branch to A10
13894 
13895 sc_mul:
13896 	tst.b		BINDEC_FLG(%a6)	# check for denorm
13897 	beq.w		A9_norm		# if norm, continue with mul
13898 
13899 # for DENORM, we must calculate:
13900 #	fp0 = input_op * 10^ISCALE * 10^24
13901 # since the input operand is a DENORM, we can't multiply it directly.
13902 # so, we do the multiplication of the exponents and mantissas separately.
13903 # in this way, we avoid underflow on intermediate stages of the
13904 # multiplication and guarantee a result without exception.
13905 	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
13906 
13907 	mov.w		(%sp),%d3	# grab exponent
13908 	andi.w		&0x7fff,%d3	# clear sign
13909 	ori.w		&0x8000,(%a0)	# make DENORM exp negative
13910 	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
13911 	subi.w		&0x3fff,%d3	# subtract BIAS
13912 	add.w		36(%a1),%d3
13913 	subi.w		&0x3fff,%d3	# subtract BIAS
13914 	add.w		48(%a1),%d3
13915 	subi.w		&0x3fff,%d3	# subtract BIAS
13916 
13917 	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
13918 
13919 	andi.w		&0x8000,(%sp)	# keep sign
13920 	or.w		%d3,(%sp)	# insert new exponent
13921 	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
13922 	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
13923 	mov.l		0x4(%a0),-(%sp)
13924 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13925 	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
13926 	fmul.x		(%sp)+,%fp0
13927 
13928 #	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
13929 #	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
13930 	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
13931 	mov.l		36+4(%a1),-(%sp)
13932 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13933 	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
13934 	mov.l		48+4(%a1),-(%sp)
13935 	mov.l		&0x3fff0000,-(%sp)# force exp to zero
13936 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
13937 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
13938 	bra.b		A10_st
13939 
13940 sc_mul_err:
13941 	bra.b		sc_mul_err
13942 
13943 A9_norm:
13944 	tst.w		%d2		# test for small exp case
13945 	beq.b		A9_con		# if zero, continue as normal
13946 	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
13947 	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
13948 A9_con:
13949 	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
13950 
13951 # A10. Or in INEX.
13952 #      If INEX is set, round error occurred.  This is compensated
13953 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954 #
13955 # Register usage:
13956 #	Input/Output
13957 #	d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958 #	d2: x/x
13959 #	d3: x/x
13960 #	d4: LEN/Unchanged
13961 #	d5: ICTR:LAMBDA
13962 #	d6: ILOG/Unchanged
13963 #	d7: k-factor/Unchanged
13964 #	a0: ptr for original operand/final result
13965 #	a1: ptr to PTENxx array/Unchanged
13966 #	a2: x/ptr to FP_SCR1(a6)
13967 #	fp0: Y/Y with lsb adjusted
13968 #	fp1: 10^ISCALE/Unchanged
13969 #	fp2: x/x
13970 
13971 A10_st:
13972 	fmov.l		%fpsr,%d0	# get FPSR
13973 	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
13974 	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
13975 	btst		&9,%d0		# check if INEX2 set
13976 	beq.b		A11_st		# if clear, skip rest
13977 	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
13978 	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
13979 
13980 
13981 # A11. Restore original FPCR; set size ext.
13982 #      Perform FINT operation in the user's rounding mode.  Keep
13983 #      the size to extended.  The sintdo entry point in the sint
13984 #      routine expects the FPCR value to be in USER_FPCR for
13985 #      mode and precision.  The original FPCR is saved in L_SCR1.
13986 
13987 A11_st:
13988 	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
13989 	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
13990 #					;block exceptions
13991 
13992 
13993 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994 #      The FPSP routine sintd0 is used.  The output is in fp0.
13995 #
13996 # Register usage:
13997 #	Input/Output
13998 #	d0: FPSR with AINEX cleared/FPCR with size set to ext
13999 #	d2: x/x/scratch
14000 #	d3: x/x
14001 #	d4: LEN/Unchanged
14002 #	d5: ICTR:LAMBDA/Unchanged
14003 #	d6: ILOG/Unchanged
14004 #	d7: k-factor/Unchanged
14005 #	a0: ptr for original operand/src ptr for sintdo
14006 #	a1: ptr to PTENxx array/Unchanged
14007 #	a2: ptr to FP_SCR1(a6)/Unchanged
14008 #	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009 #	fp0: Y/YINT
14010 #	fp1: 10^ISCALE/Unchanged
14011 #	fp2: x/x
14012 #	F_SCR1:x/x
14013 #	F_SCR2:Y adjusted for inex/Y with original exponent
14014 #	L_SCR1:x/original USER_FPCR
14015 #	L_SCR2:first word of X packed/Unchanged
14016 
14017 A12_st:
14018 	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
14019 	mov.l	L_SCR1(%a6),-(%sp)
14020 	mov.l	L_SCR2(%a6),-(%sp)
14021 
14022 	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
14023 	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
14024 	tst.l		L_SCR2(%a6)	# test sign of original operand
14025 	bge.b		do_fint12		# if pos, use Y
14026 	or.l		&0x80000000,(%a0)	# if neg, use -Y
14027 do_fint12:
14028 	mov.l	USER_FPSR(%a6),-(%sp)
14029 #	bsr	sintdo		# sint routine returns int in fp0
14030 
14031 	fmov.l	USER_FPCR(%a6),%fpcr
14032 	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
14033 ##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
14034 ##	andi.l		&0x00000030,%d0
14035 ##	fmov.l		%d0,%fpcr
14036 	fint.x		FP_SCR1(%a6),%fp0	# do fint()
14037 	fmov.l	%fpsr,%d0
14038 	or.w	%d0,FPSR_EXCEPT(%a6)
14039 ##	fmov.l		&0x0,%fpcr
14040 ##	fmov.l		%fpsr,%d0		# don't keep ccodes
14041 ##	or.w		%d0,FPSR_EXCEPT(%a6)
14042 
14043 	mov.b	(%sp),USER_FPSR(%a6)
14044 	add.l	&4,%sp
14045 
14046 	mov.l	(%sp)+,L_SCR2(%a6)
14047 	mov.l	(%sp)+,L_SCR1(%a6)
14048 	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
14049 
14050 	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
14051 	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
14052 
14053 # A13. Check for LEN digits.
14054 #      If the int operation results in more than LEN digits,
14055 #      or less than LEN -1 digits, adjust ILOG and repeat from
14056 #      A6.  This test occurs only on the first pass.  If the
14057 #      result is exactly 10^LEN, decrement ILOG and divide
14058 #      the mantissa by 10.  The calculation of 10^LEN cannot
14059 #      be inexact, since all powers of ten up to 10^27 are exact
14060 #      in extended precision, so the use of a previous power-of-ten
14061 #      table will introduce no error.
14062 #
14063 #
14064 # Register usage:
14065 #	Input/Output
14066 #	d0: FPCR with size set to ext/scratch final = 0
14067 #	d2: x/x
14068 #	d3: x/scratch final = x
14069 #	d4: LEN/LEN adjusted
14070 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
14071 #	d6: ILOG/ILOG adjusted
14072 #	d7: k-factor/Unchanged
14073 #	a0: pointer into memory for packed bcd string formation
14074 #	a1: ptr to PTENxx array/Unchanged
14075 #	a2: ptr to FP_SCR1(a6)/Unchanged
14076 #	fp0: int portion of Y/abs(YINT) adjusted
14077 #	fp1: 10^ISCALE/Unchanged
14078 #	fp2: x/10^LEN
14079 #	F_SCR1:x/x
14080 #	F_SCR2:Y with original exponent/Unchanged
14081 #	L_SCR1:original USER_FPCR/Unchanged
14082 #	L_SCR2:first word of X packed/Unchanged
14083 
14084 A13_st:
14085 	swap		%d5		# put ICTR in lower word of d5
14086 	tst.w		%d5		# check if ICTR = 0
14087 	bne		not_zr		# if non-zero, go to second test
14088 #
14089 # Compute 10^(LEN-1)
14090 #
14091 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14092 	mov.l		%d4,%d0		# put LEN in d0
14093 	subq.l		&1,%d0		# d0 = LEN -1
14094 	clr.l		%d3		# clr table index
14095 l_loop:
14096 	lsr.l		&1,%d0		# shift next bit into carry
14097 	bcc.b		l_next		# if zero, skip the mul
14098 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14099 l_next:
14100 	add.l		&12,%d3		# inc d3 to next pwrten table entry
14101 	tst.l		%d0		# test if LEN is zero
14102 	bne.b		l_loop		# if not, loop
14103 #
14104 # 10^LEN-1 is computed for this test and A14.  If the input was
14105 # denormalized, check only the case in which YINT > 10^LEN.
14106 #
14107 	tst.b		BINDEC_FLG(%a6)	# check if input was norm
14108 	beq.b		A13_con		# if norm, continue with checking
14109 	fabs.x		%fp0		# take abs of YINT
14110 	bra		test_2
14111 #
14112 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113 #
14114 A13_con:
14115 	fabs.x		%fp0		# take abs of YINT
14116 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
14117 	fbge.w		test_2		# if greater, do next test
14118 	subq.l		&1,%d6		# subtract 1 from ILOG
14119 	mov.w		&1,%d5		# set ICTR
14120 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14121 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14122 	bra.w		A6_str		# return to A6 and recompute YINT
14123 test_2:
14124 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14125 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
14126 	fblt.w		A14_st		# if less, all is ok, go to A14
14127 	fbgt.w		fix_ex		# if greater, fix and redo
14128 	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
14129 	addq.l		&1,%d6		# and inc ILOG
14130 	bra.b		A14_st		# and continue elsewhere
14131 fix_ex:
14132 	addq.l		&1,%d6		# increment ILOG by 1
14133 	mov.w		&1,%d5		# set ICTR
14134 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14135 	bra.w		A6_str		# return to A6 and recompute YINT
14136 #
14137 # Since ICTR <> 0, we have already been through one adjustment,
14138 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139 # 10^LEN is again computed using whatever table is in a1 since the
14140 # value calculated cannot be inexact.
14141 #
14142 not_zr:
14143 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14144 	mov.l		%d4,%d0		# put LEN in d0
14145 	clr.l		%d3		# clr table index
14146 z_loop:
14147 	lsr.l		&1,%d0		# shift next bit into carry
14148 	bcc.b		z_next		# if zero, skip the mul
14149 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14150 z_next:
14151 	add.l		&12,%d3		# inc d3 to next pwrten table entry
14152 	tst.l		%d0		# test if LEN is zero
14153 	bne.b		z_loop		# if not, loop
14154 	fabs.x		%fp0		# get abs(YINT)
14155 	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
14156 	fbneq.w		A14_st		# if not, skip this
14157 	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
14158 	addq.l		&1,%d6		# and inc ILOG by 1
14159 	addq.l		&1,%d4		# and inc LEN
14160 	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
14161 
14162 # A14. Convert the mantissa to bcd.
14163 #      The binstr routine is used to convert the LEN digit
14164 #      mantissa to bcd in memory.  The input to binstr is
14165 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166 #      such that the decimal point is to the left of bit 63.
14167 #      The bcd digits are stored in the correct position in
14168 #      the final string area in memory.
14169 #
14170 #
14171 # Register usage:
14172 #	Input/Output
14173 #	d0: x/LEN call to binstr - final is 0
14174 #	d1: x/0
14175 #	d2: x/ms 32-bits of mant of abs(YINT)
14176 #	d3: x/ls 32-bits of mant of abs(YINT)
14177 #	d4: LEN/Unchanged
14178 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
14179 #	d6: ILOG
14180 #	d7: k-factor/Unchanged
14181 #	a0: pointer into memory for packed bcd string formation
14182 #	    /ptr to first mantissa byte in result string
14183 #	a1: ptr to PTENxx array/Unchanged
14184 #	a2: ptr to FP_SCR1(a6)/Unchanged
14185 #	fp0: int portion of Y/abs(YINT) adjusted
14186 #	fp1: 10^ISCALE/Unchanged
14187 #	fp2: 10^LEN/Unchanged
14188 #	F_SCR1:x/Work area for final result
14189 #	F_SCR2:Y with original exponent/Unchanged
14190 #	L_SCR1:original USER_FPCR/Unchanged
14191 #	L_SCR2:first word of X packed/Unchanged
14192 
14193 A14_st:
14194 	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
14195 	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
14196 	lea.l		FP_SCR0(%a6),%a0
14197 	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
14198 	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
14199 	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
14200 	clr.l		4(%a0)		# zero word 2 of FP_RES
14201 	clr.l		8(%a0)		# zero word 3 of FP_RES
14202 	mov.l		(%a0),%d0	# move exponent to d0
14203 	swap		%d0		# put exponent in lower word
14204 	beq.b		no_sft		# if zero, don't shift
14205 	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
14206 	tst.l		%d0		# check if > 1
14207 	bgt.b		no_sft		# if so, don't shift
14208 	neg.l		%d0		# make exp positive
14209 m_loop:
14210 	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
14211 	roxr.l		&1,%d3		# the number of places
14212 	dbf.w		%d0,m_loop	# given in d0
14213 no_sft:
14214 	tst.l		%d2		# check for mantissa of zero
14215 	bne.b		no_zr		# if not, go on
14216 	tst.l		%d3		# continue zero check
14217 	beq.b		zer_m		# if zero, go directly to binstr
14218 no_zr:
14219 	clr.l		%d1		# put zero in d1 for addx
14220 	add.l		&0x00000080,%d3	# inc at bit 7
14221 	addx.l		%d1,%d2		# continue inc
14222 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14223 zer_m:
14224 	mov.l		%d4,%d0		# put LEN in d0 for binstr call
14225 	addq.l		&3,%a0		# a0 points to M16 byte in result
14226 	bsr		binstr		# call binstr to convert mant
14227 
14228 
14229 # A15. Convert the exponent to bcd.
14230 #      As in A14 above, the exp is converted to bcd and the
14231 #      digits are stored in the final string.
14232 #
14233 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234 #
14235 #	 32               16 15                0
14236 #	-----------------------------------------
14237 #	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14238 #	-----------------------------------------
14239 #
14240 # And are moved into their proper places in FP_SCR0.  If digit e4
14241 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14242 # written as specified in the 881/882 manual for packed decimal.
14243 #
14244 # Register usage:
14245 #	Input/Output
14246 #	d0: x/LEN call to binstr - final is 0
14247 #	d1: x/scratch (0);shift count for final exponent packing
14248 #	d2: x/ms 32-bits of exp fraction/scratch
14249 #	d3: x/ls 32-bits of exp fraction
14250 #	d4: LEN/Unchanged
14251 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
14252 #	d6: ILOG
14253 #	d7: k-factor/Unchanged
14254 #	a0: ptr to result string/ptr to L_SCR1(a6)
14255 #	a1: ptr to PTENxx array/Unchanged
14256 #	a2: ptr to FP_SCR1(a6)/Unchanged
14257 #	fp0: abs(YINT) adjusted/float(ILOG)
14258 #	fp1: 10^ISCALE/Unchanged
14259 #	fp2: 10^LEN/Unchanged
14260 #	F_SCR1:Work area for final result/BCD result
14261 #	F_SCR2:Y with original exponent/ILOG/10^4
14262 #	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263 #	L_SCR2:first word of X packed/Unchanged
14264 
14265 A15_st:
14266 	tst.b		BINDEC_FLG(%a6)	# check for denorm
14267 	beq.b		not_denorm
14268 	ftest.x		%fp0		# test for zero
14269 	fbeq.w		den_zero	# if zero, use k-factor or 4933
14270 	fmov.l		%d6,%fp0	# float ILOG
14271 	fabs.x		%fp0		# get abs of ILOG
14272 	bra.b		convrt
14273 den_zero:
14274 	tst.l		%d7		# check sign of the k-factor
14275 	blt.b		use_ilog	# if negative, use ILOG
14276 	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
14277 	bra.b		convrt		# do it
14278 use_ilog:
14279 	fmov.l		%d6,%fp0	# float ILOG
14280 	fabs.x		%fp0		# get abs of ILOG
14281 	bra.b		convrt
14282 not_denorm:
14283 	ftest.x		%fp0		# test for zero
14284 	fbneq.w		not_zero	# if zero, force exponent
14285 	fmov.s		FONE(%pc),%fp0	# force exponent to 1
14286 	bra.b		convrt		# do it
14287 not_zero:
14288 	fmov.l		%d6,%fp0	# float ILOG
14289 	fabs.x		%fp0		# get abs of ILOG
14290 convrt:
14291 	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
14292 	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
14293 	mov.l		4(%a2),%d2	# move word 2 to d2
14294 	mov.l		8(%a2),%d3	# move word 3 to d3
14295 	mov.w		(%a2),%d0	# move exp to d0
14296 	beq.b		x_loop_fin	# if zero, skip the shift
14297 	sub.w		&0x3ffd,%d0	# subtract off bias
14298 	neg.w		%d0		# make exp positive
14299 x_loop:
14300 	lsr.l		&1,%d2		# shift d2:d3 right
14301 	roxr.l		&1,%d3		# the number of places
14302 	dbf.w		%d0,x_loop	# given in d0
14303 x_loop_fin:
14304 	clr.l		%d1		# put zero in d1 for addx
14305 	add.l		&0x00000080,%d3	# inc at bit 6
14306 	addx.l		%d1,%d2		# continue inc
14307 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14308 	mov.l		&4,%d0		# put 4 in d0 for binstr call
14309 	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
14310 	bsr		binstr		# call binstr to convert exp
14311 	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
14312 	mov.l		&12,%d1		# use d1 for shift count
14313 	lsr.l		%d1,%d0		# shift d0 right by 12
14314 	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
14315 	lsr.l		%d1,%d0		# shift d0 right by 12
14316 	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
14317 	tst.b		%d0		# check if e4 is zero
14318 	beq.b		A16_st		# if zero, skip rest
14319 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
14320 
14321 
14322 # A16. Write sign bits to final string.
14323 #	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324 #
14325 # Register usage:
14326 #	Input/Output
14327 #	d0: x/scratch - final is x
14328 #	d2: x/x
14329 #	d3: x/x
14330 #	d4: LEN/Unchanged
14331 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
14332 #	d6: ILOG/ILOG adjusted
14333 #	d7: k-factor/Unchanged
14334 #	a0: ptr to L_SCR1(a6)/Unchanged
14335 #	a1: ptr to PTENxx array/Unchanged
14336 #	a2: ptr to FP_SCR1(a6)/Unchanged
14337 #	fp0: float(ILOG)/Unchanged
14338 #	fp1: 10^ISCALE/Unchanged
14339 #	fp2: 10^LEN/Unchanged
14340 #	F_SCR1:BCD result with correct signs
14341 #	F_SCR2:ILOG/10^4
14342 #	L_SCR1:Exponent digits on return from binstr
14343 #	L_SCR2:first word of X packed/Unchanged
14344 
14345 A16_st:
14346 	clr.l		%d0		# clr d0 for collection of signs
14347 	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
14348 	tst.l		L_SCR2(%a6)	# check sign of original mantissa
14349 	bge.b		mant_p		# if pos, don't set SM
14350 	mov.l		&2,%d0		# move 2 in to d0 for SM
14351 mant_p:
14352 	tst.l		%d6		# check sign of ILOG
14353 	bge.b		wr_sgn		# if pos, don't set SE
14354 	addq.l		&1,%d0		# set bit 0 in d0 for SE
14355 wr_sgn:
14356 	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
14357 
14358 # Clean up and restore all registers used.
14359 
14360 	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
14361 	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
14362 	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
14363 	rts
14364 
14365 	global		PTENRN
14366 PTENRN:
14367 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14368 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14369 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14370 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14371 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14372 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14373 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14374 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14375 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14376 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14377 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14378 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14379 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14380 
14381 	global		PTENRP
14382 PTENRP:
14383 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14384 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14385 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14386 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14387 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14388 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14389 	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
14390 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14391 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14392 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14393 	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
14394 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14395 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14396 
14397 	global		PTENRM
14398 PTENRM:
14399 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14400 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14401 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14402 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14403 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14404 	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
14405 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14406 	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
14407 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
14408 	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
14409 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14410 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
14411 	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
14412 
14413 #########################################################################
14414 # binstr(): Converts a 64-bit binary integer to bcd.			#
14415 #									#
14416 # INPUT *************************************************************** #
14417 #	d2:d3 = 64-bit binary integer					#
14418 #	d0    = desired length (LEN)					#
14419 #	a0    = pointer to start in memory for bcd characters		#
14420 #		(This pointer must point to byte 4 of the first		#
14421 #		 lword of the packed decimal memory string.)		#
14422 #									#
14423 # OUTPUT ************************************************************** #
14424 #	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
14425 #									#
14426 # ALGORITHM ***********************************************************	#
14427 #	The 64-bit binary is assumed to have a decimal point before	#
14428 #	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
14429 #	shift and a mul by 8 shift.  The bits shifted out of the	#
14430 #	msb form a decimal digit.  This process is iterated until	#
14431 #	LEN digits are formed.						#
14432 #									#
14433 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
14434 #     digit formed will be assumed the least significant.  This is	#
14435 #     to force the first byte formed to have a 0 in the upper 4 bits.	#
14436 #									#
14437 # A2. Beginning of the loop:						#
14438 #     Copy the fraction in d2:d3 to d4:d5.				#
14439 #									#
14440 # A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
14441 #     extracts and shifts.  The three msbs from d2 will go into d1.	#
14442 #									#
14443 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
14444 #     will be collected by the carry.					#
14445 #									#
14446 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
14447 #     into d2:d3.  D1 will contain the bcd digit formed.		#
14448 #									#
14449 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
14450 #     zero, it is the ls digit.  Put the digit in its place in the	#
14451 #     upper word of d0.  If it is the ls digit, write the word		#
14452 #     from d0 to memory.						#
14453 #									#
14454 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
14455 #									#
14456 #########################################################################
14457 
14458 #	Implementation Notes:
14459 #
14460 #	The registers are used as follows:
14461 #
14462 #		d0: LEN counter
14463 #		d1: temp used to form the digit
14464 #		d2: upper 32-bits of fraction for mul by 8
14465 #		d3: lower 32-bits of fraction for mul by 8
14466 #		d4: upper 32-bits of fraction for mul by 2
14467 #		d5: lower 32-bits of fraction for mul by 2
14468 #		d6: temp for bit-field extracts
14469 #		d7: byte digit formation word;digit count {0,1}
14470 #		a0: pointer into memory for packed bcd string formation
14471 #
14472 
14473 	global		binstr
14474 binstr:
14475 	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
14476 
14477 #
14478 # A1: Init d7
14479 #
14480 	mov.l		&1,%d7		# init d7 for second digit
14481 	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
14482 #
14483 # A2. Copy d2:d3 to d4:d5.  Start loop.
14484 #
14485 loop:
14486 	mov.l		%d2,%d4		# copy the fraction before muls
14487 	mov.l		%d3,%d5		# to d4:d5
14488 #
14489 # A3. Multiply d2:d3 by 8; extract msbs into d1.
14490 #
14491 	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
14492 	asl.l		&3,%d2		# shift d2 left by 3 places
14493 	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
14494 	asl.l		&3,%d3		# shift d3 left by 3 places
14495 	or.l		%d6,%d2		# or in msbs from d3 into d2
14496 #
14497 # A4. Multiply d4:d5 by 2; add carry out to d1.
14498 #
14499 	asl.l		&1,%d5		# mul d5 by 2
14500 	roxl.l		&1,%d4		# mul d4 by 2
14501 	swap		%d6		# put 0 in d6 lower word
14502 	addx.w		%d6,%d1		# add in extend from mul by 2
14503 #
14504 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14505 #
14506 	add.l		%d5,%d3		# add lower 32 bits
14507 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508 	addx.l		%d4,%d2		# add with extend upper 32 bits
14509 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510 	addx.w		%d6,%d1		# add in extend from add to d1
14511 	swap		%d6		# with d6 = 0; put 0 in upper word
14512 #
14513 # A6. Test d7 and branch.
14514 #
14515 	tst.w		%d7		# if zero, store digit & to loop
14516 	beq.b		first_d		# if non-zero, form byte & write
14517 sec_d:
14518 	swap		%d7		# bring first digit to word d7b
14519 	asl.w		&4,%d7		# first digit in upper 4 bits d7b
14520 	add.w		%d1,%d7		# add in ls digit to d7b
14521 	mov.b		%d7,(%a0)+	# store d7b byte in memory
14522 	swap		%d7		# put LEN counter in word d7a
14523 	clr.w		%d7		# set d7a to signal no digits done
14524 	dbf.w		%d0,loop	# do loop some more!
14525 	bra.b		end_bstr	# finished, so exit
14526 first_d:
14527 	swap		%d7		# put digit word in d7b
14528 	mov.w		%d1,%d7		# put new digit in d7b
14529 	swap		%d7		# put LEN counter in word d7a
14530 	addq.w		&1,%d7		# set d7a to signal first digit done
14531 	dbf.w		%d0,loop	# do loop some more!
14532 	swap		%d7		# put last digit in string
14533 	lsl.w		&4,%d7		# move it to upper 4 bits
14534 	mov.b		%d7,(%a0)+	# store it in memory string
14535 #
14536 # Clean up and return with result in fp0.
14537 #
14538 end_bstr:
14539 	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
14540 	rts
14541 
14542 #########################################################################
14543 # XDEF ****************************************************************	#
14544 #	facc_in_b(): dmem_read_byte failed				#
14545 #	facc_in_w(): dmem_read_word failed				#
14546 #	facc_in_l(): dmem_read_long failed				#
14547 #	facc_in_d(): dmem_read of dbl prec failed			#
14548 #	facc_in_x(): dmem_read of ext prec failed			#
14549 #									#
14550 #	facc_out_b(): dmem_write_byte failed				#
14551 #	facc_out_w(): dmem_write_word failed				#
14552 #	facc_out_l(): dmem_write_long failed				#
14553 #	facc_out_d(): dmem_write of dbl prec failed			#
14554 #	facc_out_x(): dmem_write of ext prec failed			#
14555 #									#
14556 # XREF ****************************************************************	#
14557 #	_real_access() - exit through access error handler		#
14558 #									#
14559 # INPUT ***************************************************************	#
14560 #	None								#
14561 #									#
14562 # OUTPUT **************************************************************	#
14563 #	None								#
14564 #									#
14565 # ALGORITHM ***********************************************************	#
14566 #	Flow jumps here when an FP data fetch call gets an error	#
14567 # result. This means the operating system wants an access error frame	#
14568 # made out of the current exception stack frame.			#
14569 #	So, we first call restore() which makes sure that any updated	#
14570 # -(an)+ register gets returned to its pre-exception value and then	#
14571 # we change the stack to an access error stack frame.			#
14572 #									#
14573 #########################################################################
14574 
14575 facc_in_b:
14576 	movq.l		&0x1,%d0			# one byte
14577 	bsr.w		restore				# fix An
14578 
14579 	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
14580 	bra.w		facc_finish
14581 
14582 facc_in_w:
14583 	movq.l		&0x2,%d0			# two bytes
14584 	bsr.w		restore				# fix An
14585 
14586 	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
14587 	bra.b		facc_finish
14588 
14589 facc_in_l:
14590 	movq.l		&0x4,%d0			# four bytes
14591 	bsr.w		restore				# fix An
14592 
14593 	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
14594 	bra.b		facc_finish
14595 
14596 facc_in_d:
14597 	movq.l		&0x8,%d0			# eight bytes
14598 	bsr.w		restore				# fix An
14599 
14600 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14601 	bra.b		facc_finish
14602 
14603 facc_in_x:
14604 	movq.l		&0xc,%d0			# twelve bytes
14605 	bsr.w		restore				# fix An
14606 
14607 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14608 	bra.b		facc_finish
14609 
14610 ################################################################
14611 
14612 facc_out_b:
14613 	movq.l		&0x1,%d0			# one byte
14614 	bsr.w		restore				# restore An
14615 
14616 	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
14617 	bra.b		facc_finish
14618 
14619 facc_out_w:
14620 	movq.l		&0x2,%d0			# two bytes
14621 	bsr.w		restore				# restore An
14622 
14623 	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
14624 	bra.b		facc_finish
14625 
14626 facc_out_l:
14627 	movq.l		&0x4,%d0			# four bytes
14628 	bsr.w		restore				# restore An
14629 
14630 	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
14631 	bra.b		facc_finish
14632 
14633 facc_out_d:
14634 	movq.l		&0x8,%d0			# eight bytes
14635 	bsr.w		restore				# restore An
14636 
14637 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14638 	bra.b		facc_finish
14639 
14640 facc_out_x:
14641 	mov.l		&0xc,%d0			# twelve bytes
14642 	bsr.w		restore				# restore An
14643 
14644 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14645 
14646 # here's where we actually create the access error frame from the
14647 # current exception stack frame.
14648 facc_finish:
14649 	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650 
14651 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
14652 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
14654 
14655 	unlk		%a6
14656 
14657 	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
14658 	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
14659 	mov.l		0xc(%sp),0x8(%sp)	# store EA
14660 	mov.l		&0x00000001,0xc(%sp)	# store FSLW
14661 	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
14662 	mov.w		&0x4008,0x6(%sp)	# store voff
14663 
14664 	btst		&0x5,(%sp)		# supervisor or user mode?
14665 	beq.b		facc_out2		# user
14666 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
14667 
14668 facc_out2:
14669 	bra.l		_real_access
14670 
14671 ##################################################################
14672 
14673 # if the effective addressing mode was predecrement or postincrement,
14674 # the emulation has already changed its value to the correct post-
14675 # instruction value. but since we're exiting to the access error
14676 # handler, then AN must be returned to its pre-instruction value.
14677 # we do that here.
14678 restore:
14679 	mov.b		EXC_OPWORD+0x1(%a6),%d1
14680 	andi.b		&0x38,%d1		# extract opmode
14681 	cmpi.b		%d1,&0x18		# postinc?
14682 	beq.w		rest_inc
14683 	cmpi.b		%d1,&0x20		# predec?
14684 	beq.w		rest_dec
14685 	rts
14686 
14687 rest_inc:
14688 	mov.b		EXC_OPWORD+0x1(%a6),%d1
14689 	andi.w		&0x0007,%d1		# fetch An
14690 
14691 	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692 	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
14693 
14694 tbl_rest_inc:
14695 	short		ri_a0 - tbl_rest_inc
14696 	short		ri_a1 - tbl_rest_inc
14697 	short		ri_a2 - tbl_rest_inc
14698 	short		ri_a3 - tbl_rest_inc
14699 	short		ri_a4 - tbl_rest_inc
14700 	short		ri_a5 - tbl_rest_inc
14701 	short		ri_a6 - tbl_rest_inc
14702 	short		ri_a7 - tbl_rest_inc
14703 
14704 ri_a0:
14705 	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
14706 	rts
14707 ri_a1:
14708 	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
14709 	rts
14710 ri_a2:
14711 	sub.l		%d0,%a2			# fix a2
14712 	rts
14713 ri_a3:
14714 	sub.l		%d0,%a3			# fix a3
14715 	rts
14716 ri_a4:
14717 	sub.l		%d0,%a4			# fix a4
14718 	rts
14719 ri_a5:
14720 	sub.l		%d0,%a5			# fix a5
14721 	rts
14722 ri_a6:
14723 	sub.l		%d0,(%a6)		# fix stacked a6
14724 	rts
14725 # if it's a fmove out instruction, we don't have to fix a7
14726 # because we hadn't changed it yet. if it's an opclass two
14727 # instruction (data moved in) and the exception was in supervisor
14728 # mode, then also also wasn't updated. if it was user mode, then
14729 # restore the correct a7 which is in the USP currently.
14730 ri_a7:
14731 	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
14732 	bne.b		ri_a7_done		# out
14733 
14734 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
14735 	bne.b		ri_a7_done		# supervisor
14736 	movc		%usp,%a0		# restore USP
14737 	sub.l		%d0,%a0
14738 	movc		%a0,%usp
14739 ri_a7_done:
14740 	rts
14741 
14742 # need to invert adjustment value if the <ea> was predec
14743 rest_dec:
14744 	neg.l		%d0
14745 	bra.b		rest_inc
14746