162306a36Sopenharmony_ci~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
262306a36Sopenharmony_ciMOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
362306a36Sopenharmony_ciM68000 Hi-Performance Microprocessor Division
462306a36Sopenharmony_ciM68060 Software Package
562306a36Sopenharmony_ciProduction Release P1.00 -- October 10, 1994
662306a36Sopenharmony_ci
762306a36Sopenharmony_ciM68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
862306a36Sopenharmony_ci
962306a36Sopenharmony_ciTHE SOFTWARE is provided on an "AS IS" basis and without warranty.
1062306a36Sopenharmony_ciTo the maximum extent permitted by applicable law,
1162306a36Sopenharmony_ciMOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
1262306a36Sopenharmony_ciINCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
1362306a36Sopenharmony_ciand any warranty against infringement with regard to the SOFTWARE
1462306a36Sopenharmony_ci(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciTo the maximum extent permitted by applicable law,
1762306a36Sopenharmony_ciIN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
1862306a36Sopenharmony_ci(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
1962306a36Sopenharmony_ciBUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
2062306a36Sopenharmony_ciARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
2162306a36Sopenharmony_ciMotorola assumes no responsibility for the maintenance and support of the SOFTWARE.
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ciYou are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
2462306a36Sopenharmony_ciso long as this entire notice is retained without alteration in any modified and/or
2562306a36Sopenharmony_ciredistributed versions, and that such modified versions are clearly identified as such.
2662306a36Sopenharmony_ciNo licenses are granted by implication, estoppel or otherwise under any patents
2762306a36Sopenharmony_cior trademarks of Motorola, Inc.
2862306a36Sopenharmony_ci~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2962306a36Sopenharmony_ci# litop.s:
3062306a36Sopenharmony_ci#	This file is appended to the top of the 060FPLSP package
3162306a36Sopenharmony_ci# and contains the entry points into the package. The user, in
3262306a36Sopenharmony_ci# effect, branches to one of the branch table entries located here.
3362306a36Sopenharmony_ci#
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	bra.l	_060LSP__idivs64_
3662306a36Sopenharmony_ci	short	0x0000
3762306a36Sopenharmony_ci	bra.l	_060LSP__idivu64_
3862306a36Sopenharmony_ci	short	0x0000
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	bra.l	_060LSP__imuls64_
4162306a36Sopenharmony_ci	short	0x0000
4262306a36Sopenharmony_ci	bra.l	_060LSP__imulu64_
4362306a36Sopenharmony_ci	short	0x0000
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	bra.l	_060LSP__cmp2_Ab_
4662306a36Sopenharmony_ci	short	0x0000
4762306a36Sopenharmony_ci	bra.l	_060LSP__cmp2_Aw_
4862306a36Sopenharmony_ci	short	0x0000
4962306a36Sopenharmony_ci	bra.l	_060LSP__cmp2_Al_
5062306a36Sopenharmony_ci	short	0x0000
5162306a36Sopenharmony_ci	bra.l	_060LSP__cmp2_Db_
5262306a36Sopenharmony_ci	short	0x0000
5362306a36Sopenharmony_ci	bra.l	_060LSP__cmp2_Dw_
5462306a36Sopenharmony_ci	short	0x0000
5562306a36Sopenharmony_ci	bra.l	_060LSP__cmp2_Dl_
5662306a36Sopenharmony_ci	short	0x0000
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci# leave room for future possible aditions.
5962306a36Sopenharmony_ci	align	0x200
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci#########################################################################
6262306a36Sopenharmony_ci# XDEF ****************************************************************	#
6362306a36Sopenharmony_ci#	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
6462306a36Sopenharmony_ci#	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
6562306a36Sopenharmony_ci#									#
6662306a36Sopenharmony_ci#	This is the library version which is accessed as a subroutine	#
6762306a36Sopenharmony_ci#	and therefore does not work exactly like the 680X0 div{s,u}.l	#
6862306a36Sopenharmony_ci#	64-bit divide instruction.					#
6962306a36Sopenharmony_ci#									#
7062306a36Sopenharmony_ci# XREF ****************************************************************	#
7162306a36Sopenharmony_ci#	None.								#
7262306a36Sopenharmony_ci#									#
7362306a36Sopenharmony_ci# INPUT ***************************************************************	#
7462306a36Sopenharmony_ci#	0x4(sp)  = divisor						#
7562306a36Sopenharmony_ci#	0x8(sp)  = hi(dividend)						#
7662306a36Sopenharmony_ci#	0xc(sp)  = lo(dividend)						#
7762306a36Sopenharmony_ci#	0x10(sp) = pointer to location to place quotient/remainder	#
7862306a36Sopenharmony_ci#									#
7962306a36Sopenharmony_ci# OUTPUT **************************************************************	#
8062306a36Sopenharmony_ci#	0x10(sp) = points to location of remainder/quotient.		#
8162306a36Sopenharmony_ci#		   remainder is in first longword, quotient is in 2nd.	#
8262306a36Sopenharmony_ci#									#
8362306a36Sopenharmony_ci# ALGORITHM ***********************************************************	#
8462306a36Sopenharmony_ci#	If the operands are signed, make them unsigned and save the	#
8562306a36Sopenharmony_ci# sign info for later. Separate out special cases like divide-by-zero	#
8662306a36Sopenharmony_ci# or 32-bit divides if possible. Else, use a special math algorithm	#
8762306a36Sopenharmony_ci# to calculate the result.						#
8862306a36Sopenharmony_ci#	Restore sign info if signed instruction. Set the condition	#
8962306a36Sopenharmony_ci# codes before performing the final "rts". If the divisor was equal to	#
9062306a36Sopenharmony_ci# zero, then perform a divide-by-zero using a 16-bit implemented	#
9162306a36Sopenharmony_ci# divide instruction. This way, the operating system can record that	#
9262306a36Sopenharmony_ci# the event occurred even though it may not point to the correct place.	#
9362306a36Sopenharmony_ci#									#
9462306a36Sopenharmony_ci#########################################################################
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ciset	POSNEG,		-1
9762306a36Sopenharmony_ciset	NDIVISOR,	-2
9862306a36Sopenharmony_ciset	NDIVIDEND,	-3
9962306a36Sopenharmony_ciset	DDSECOND,	-4
10062306a36Sopenharmony_ciset	DDNORMAL,	-8
10162306a36Sopenharmony_ciset	DDQUOTIENT,	-12
10262306a36Sopenharmony_ciset	DIV64_CC,	-16
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci##########
10562306a36Sopenharmony_ci# divs.l #
10662306a36Sopenharmony_ci##########
10762306a36Sopenharmony_ci	global		_060LSP__idivs64_
10862306a36Sopenharmony_ci_060LSP__idivs64_:
10962306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
11062306a36Sopenharmony_ci	link.w		%a6,&-16
11162306a36Sopenharmony_ci	movm.l		&0x3f00,-(%sp)		# save d2-d7
11262306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
11362306a36Sopenharmony_ci# PROLOGUE END ##########################################################
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	mov.w		%cc,DIV64_CC(%a6)
11662306a36Sopenharmony_ci	st		POSNEG(%a6)		# signed operation
11762306a36Sopenharmony_ci	bra.b		ldiv64_cont
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci##########
12062306a36Sopenharmony_ci# divu.l #
12162306a36Sopenharmony_ci##########
12262306a36Sopenharmony_ci	global		_060LSP__idivu64_
12362306a36Sopenharmony_ci_060LSP__idivu64_:
12462306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
12562306a36Sopenharmony_ci	link.w		%a6,&-16
12662306a36Sopenharmony_ci	movm.l		&0x3f00,-(%sp)		# save d2-d7
12762306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
12862306a36Sopenharmony_ci# PROLOGUE END ##########################################################
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	mov.w		%cc,DIV64_CC(%a6)
13162306a36Sopenharmony_ci	sf		POSNEG(%a6)		# unsigned operation
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cildiv64_cont:
13462306a36Sopenharmony_ci	mov.l		0x8(%a6),%d7		# fetch divisor
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	beq.w		ldiv64eq0		# divisor is = 0!!!
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	mov.l		0xc(%a6), %d5		# get dividend hi
13962306a36Sopenharmony_ci	mov.l		0x10(%a6), %d6		# get dividend lo
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci# separate signed and unsigned divide
14262306a36Sopenharmony_ci	tst.b		POSNEG(%a6)		# signed or unsigned?
14362306a36Sopenharmony_ci	beq.b		ldspecialcases		# use positive divide
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci# save the sign of the divisor
14662306a36Sopenharmony_ci# make divisor unsigned if it's negative
14762306a36Sopenharmony_ci	tst.l		%d7			# chk sign of divisor
14862306a36Sopenharmony_ci	slt		NDIVISOR(%a6)		# save sign of divisor
14962306a36Sopenharmony_ci	bpl.b		ldsgndividend
15062306a36Sopenharmony_ci	neg.l		%d7			# complement negative divisor
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci# save the sign of the dividend
15362306a36Sopenharmony_ci# make dividend unsigned if it's negative
15462306a36Sopenharmony_cildsgndividend:
15562306a36Sopenharmony_ci	tst.l		%d5			# chk sign of hi(dividend)
15662306a36Sopenharmony_ci	slt		NDIVIDEND(%a6)		# save sign of dividend
15762306a36Sopenharmony_ci	bpl.b		ldspecialcases
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	mov.w		&0x0, %cc		# clear 'X' cc bit
16062306a36Sopenharmony_ci	negx.l		%d6			# complement signed dividend
16162306a36Sopenharmony_ci	negx.l		%d5
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci# extract some special cases:
16462306a36Sopenharmony_ci#	- is (dividend == 0) ?
16562306a36Sopenharmony_ci#	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
16662306a36Sopenharmony_cildspecialcases:
16762306a36Sopenharmony_ci	tst.l		%d5			# is (hi(dividend) == 0)
16862306a36Sopenharmony_ci	bne.b		ldnormaldivide		# no, so try it the long way
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	tst.l		%d6			# is (lo(dividend) == 0), too
17162306a36Sopenharmony_ci	beq.w		lddone			# yes, so (dividend == 0)
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	cmp.l		%d7,%d6			# is (divisor <= lo(dividend))
17462306a36Sopenharmony_ci	bls.b		ld32bitdivide		# yes, so use 32 bit divide
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	exg		%d5,%d6			# q = 0, r = dividend
17762306a36Sopenharmony_ci	bra.w		ldivfinish		# can't divide, we're done.
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cild32bitdivide:
18062306a36Sopenharmony_ci	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	bra.b		ldivfinish
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cildnormaldivide:
18562306a36Sopenharmony_ci# last special case:
18662306a36Sopenharmony_ci#	- is hi(dividend) >= divisor ? if yes, then overflow
18762306a36Sopenharmony_ci	cmp.l		%d7,%d5
18862306a36Sopenharmony_ci	bls.b		lddovf			# answer won't fit in 32 bits
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci# perform the divide algorithm:
19162306a36Sopenharmony_ci	bsr.l		ldclassical		# do int divide
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci# separate into signed and unsigned finishes.
19462306a36Sopenharmony_cildivfinish:
19562306a36Sopenharmony_ci	tst.b		POSNEG(%a6)		# do divs, divu separately
19662306a36Sopenharmony_ci	beq.b		lddone			# divu has no processing!!!
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci# it was a divs.l, so ccode setting is a little more complicated...
19962306a36Sopenharmony_ci	tst.b		NDIVIDEND(%a6)		# remainder has same sign
20062306a36Sopenharmony_ci	beq.b		ldcc			# as dividend.
20162306a36Sopenharmony_ci	neg.l		%d5			# sgn(rem) = sgn(dividend)
20262306a36Sopenharmony_cildcc:
20362306a36Sopenharmony_ci	mov.b		NDIVISOR(%a6), %d0
20462306a36Sopenharmony_ci	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
20562306a36Sopenharmony_ci	beq.b		ldqpos			# branch to quot positive
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci# 0x80000000 is the largest number representable as a 32-bit negative
20862306a36Sopenharmony_ci# number. the negative of 0x80000000 is 0x80000000.
20962306a36Sopenharmony_ci	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
21062306a36Sopenharmony_ci	bhi.b		lddovf
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	neg.l		%d6			# make (-quot) 2's comp
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	bra.b		lddone
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_cildqpos:
21762306a36Sopenharmony_ci	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
21862306a36Sopenharmony_ci	bne.b		lddovf
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cilddone:
22162306a36Sopenharmony_ci# if the register numbers are the same, only the quotient gets saved.
22262306a36Sopenharmony_ci# so, if we always save the quotient second, we save ourselves a cmp&beq
22362306a36Sopenharmony_ci	andi.w		&0x10,DIV64_CC(%a6)
22462306a36Sopenharmony_ci	mov.w		DIV64_CC(%a6),%cc
22562306a36Sopenharmony_ci	tst.l		%d6			# may set 'N' ccode bit
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci# here, the result is in d1 and d0. the current strategy is to save
22862306a36Sopenharmony_ci# the values at the location pointed to by a0.
22962306a36Sopenharmony_ci# use movm here to not disturb the condition codes.
23062306a36Sopenharmony_cildexit:
23162306a36Sopenharmony_ci	movm.l		&0x0060,([0x14,%a6])	# save result
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci# EPILOGUE BEGIN ########################################################
23462306a36Sopenharmony_ci#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
23562306a36Sopenharmony_ci	movm.l		(%sp)+,&0x00fc		# restore d2-d7
23662306a36Sopenharmony_ci	unlk		%a6
23762306a36Sopenharmony_ci# EPILOGUE END ##########################################################
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	rts
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci# the result should be the unchanged dividend
24262306a36Sopenharmony_cilddovf:
24362306a36Sopenharmony_ci	mov.l		0xc(%a6), %d5		# get dividend hi
24462306a36Sopenharmony_ci	mov.l		0x10(%a6), %d6		# get dividend lo
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	andi.w		&0x1c,DIV64_CC(%a6)
24762306a36Sopenharmony_ci	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
24862306a36Sopenharmony_ci	mov.w		DIV64_CC(%a6),%cc
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	bra.b		ldexit
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_cildiv64eq0:
25362306a36Sopenharmony_ci	mov.l		0xc(%a6),([0x14,%a6])
25462306a36Sopenharmony_ci	mov.l		0x10(%a6),([0x14,%a6],0x4)
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	mov.w		DIV64_CC(%a6),%cc
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci# EPILOGUE BEGIN ########################################################
25962306a36Sopenharmony_ci#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
26062306a36Sopenharmony_ci	movm.l		(%sp)+,&0x00fc		# restore d2-d7
26162306a36Sopenharmony_ci	unlk		%a6
26262306a36Sopenharmony_ci# EPILOGUE END ##########################################################
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	divu.w		&0x0,%d0		# force a divbyzero exception
26562306a36Sopenharmony_ci	rts
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci###########################################################################
26862306a36Sopenharmony_ci#########################################################################
26962306a36Sopenharmony_ci# This routine uses the 'classical' Algorithm D from Donald Knuth's	#
27062306a36Sopenharmony_ci# Art of Computer Programming, vol II, Seminumerical Algorithms.	#
27162306a36Sopenharmony_ci# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
27262306a36Sopenharmony_ci# where U,V are words of the quadword dividend and longword divisor,	#
27362306a36Sopenharmony_ci# and U1, V1 are the most significant words.				#
27462306a36Sopenharmony_ci#									#
27562306a36Sopenharmony_ci# The most sig. longword of the 64 bit dividend must be in %d5, least	#
27662306a36Sopenharmony_ci# in %d6. The divisor must be in the variable ddivisor, and the		#
27762306a36Sopenharmony_ci# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
27862306a36Sopenharmony_ci# The quotient is returned in %d6, remainder in %d5, unless the		#
27962306a36Sopenharmony_ci# v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
28062306a36Sopenharmony_ci# is unchanged.								#
28162306a36Sopenharmony_ci#########################################################################
28262306a36Sopenharmony_cildclassical:
28362306a36Sopenharmony_ci# if the divisor msw is 0, use simpler algorithm then the full blown
28462306a36Sopenharmony_ci# one at ddknuth:
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	cmpi.l		%d7, &0xffff
28762306a36Sopenharmony_ci	bhi.b		lddknuth		# go use D. Knuth algorithm
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci# Since the divisor is only a word (and larger than the mslw of the dividend),
29062306a36Sopenharmony_ci# a simpler algorithm may be used :
29162306a36Sopenharmony_ci# In the general case, four quotient words would be created by
29262306a36Sopenharmony_ci# dividing the divisor word into each dividend word. In this case,
29362306a36Sopenharmony_ci# the first two quotient words must be zero, or overflow would occur.
29462306a36Sopenharmony_ci# Since we already checked this case above, we can treat the most significant
29562306a36Sopenharmony_ci# longword of the dividend as (0) remainder (see Knuth) and merely complete
29662306a36Sopenharmony_ci# the last two divisions to get a quotient longword and word remainder:
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	clr.l		%d1
29962306a36Sopenharmony_ci	swap		%d5			# same as r*b if previous step rqd
30062306a36Sopenharmony_ci	swap		%d6			# get u3 to lsw position
30162306a36Sopenharmony_ci	mov.w		%d6, %d5		# rb + u3
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	divu.w		%d7, %d5
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	mov.w		%d5, %d1		# first quotient word
30662306a36Sopenharmony_ci	swap		%d6			# get u4
30762306a36Sopenharmony_ci	mov.w		%d6, %d5		# rb + u4
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	divu.w		%d7, %d5
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	swap		%d1
31262306a36Sopenharmony_ci	mov.w		%d5, %d1		# 2nd quotient 'digit'
31362306a36Sopenharmony_ci	clr.w		%d5
31462306a36Sopenharmony_ci	swap		%d5			# now remainder
31562306a36Sopenharmony_ci	mov.l		%d1, %d6		# and quotient
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	rts
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_cilddknuth:
32062306a36Sopenharmony_ci# In this algorithm, the divisor is treated as a 2 digit (word) number
32162306a36Sopenharmony_ci# which is divided into a 3 digit (word) dividend to get one quotient
32262306a36Sopenharmony_ci# digit (word). After subtraction, the dividend is shifted and the
32362306a36Sopenharmony_ci# process repeated. Before beginning, the divisor and quotient are
32462306a36Sopenharmony_ci# 'normalized' so that the process of estimating the quotient digit
32562306a36Sopenharmony_ci# will yield verifiably correct results..
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
32862306a36Sopenharmony_ci	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
32962306a36Sopenharmony_ci	clr.l		%d1			# %d1 will hold trial quotient
33062306a36Sopenharmony_cilddnchk:
33162306a36Sopenharmony_ci	btst		&31, %d7		# must we normalize? first word of
33262306a36Sopenharmony_ci	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
33362306a36Sopenharmony_ci	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
33462306a36Sopenharmony_ci	lsl.l		&0x1, %d7		# shift the divisor
33562306a36Sopenharmony_ci	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
33662306a36Sopenharmony_ci	roxl.l		&0x1, %d5		# shift u1,u2
33762306a36Sopenharmony_ci	bra.w		lddnchk
33862306a36Sopenharmony_cilddnormalized:
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci# Now calculate an estimate of the quotient words (msw first, then lsw).
34162306a36Sopenharmony_ci# The comments use subscripts for the first quotient digit determination.
34262306a36Sopenharmony_ci	mov.l		%d7, %d3		# divisor
34362306a36Sopenharmony_ci	mov.l		%d5, %d2		# dividend mslw
34462306a36Sopenharmony_ci	swap		%d2
34562306a36Sopenharmony_ci	swap		%d3
34662306a36Sopenharmony_ci	cmp.w		%d2, %d3		# V1 = U1 ?
34762306a36Sopenharmony_ci	bne.b		lddqcalc1
34862306a36Sopenharmony_ci	mov.w		&0xffff, %d1		# use max trial quotient word
34962306a36Sopenharmony_ci	bra.b		lddadj0
35062306a36Sopenharmony_cilddqcalc1:
35162306a36Sopenharmony_ci	mov.l		%d5, %d1
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	divu.w		%d3, %d1		# use quotient of mslw/msw
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	andi.l		&0x0000ffff, %d1	# zero any remainder
35662306a36Sopenharmony_cilddadj0:
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci# now test the trial quotient and adjust. This step plus the
35962306a36Sopenharmony_ci# normalization assures (according to Knuth) that the trial
36062306a36Sopenharmony_ci# quotient will be at worst 1 too large.
36162306a36Sopenharmony_ci	mov.l		%d6, -(%sp)
36262306a36Sopenharmony_ci	clr.w		%d6			# word u3 left
36362306a36Sopenharmony_ci	swap		%d6			# in lsw position
36462306a36Sopenharmony_cilddadj1: mov.l		%d7, %d3
36562306a36Sopenharmony_ci	mov.l		%d1, %d2
36662306a36Sopenharmony_ci	mulu.w		%d7, %d2		# V2q
36762306a36Sopenharmony_ci	swap		%d3
36862306a36Sopenharmony_ci	mulu.w		%d1, %d3		# V1q
36962306a36Sopenharmony_ci	mov.l		%d5, %d4		# U1U2
37062306a36Sopenharmony_ci	sub.l		%d3, %d4		# U1U2 - V1q
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	swap		%d4
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	mov.w		%d4,%d0
37562306a36Sopenharmony_ci	mov.w		%d6,%d4			# insert lower word (U3)
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	tst.w		%d0			# is upper word set?
37862306a36Sopenharmony_ci	bne.w		lddadjd1
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci#	add.l		%d6, %d4		# (U1U2 - V1q) + U3
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	cmp.l		%d2, %d4
38362306a36Sopenharmony_ci	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
38462306a36Sopenharmony_ci	subq.l		&0x1, %d1		# yes, decrement and recheck
38562306a36Sopenharmony_ci	bra.b		lddadj1
38662306a36Sopenharmony_cilddadjd1:
38762306a36Sopenharmony_ci# now test the word by multiplying it by the divisor (V1V2) and comparing
38862306a36Sopenharmony_ci# the 3 digit (word) result with the current dividend words
38962306a36Sopenharmony_ci	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
39062306a36Sopenharmony_ci	mov.l		%d1, %d6
39162306a36Sopenharmony_ci	swap		%d6			# shift answer to ms 3 words
39262306a36Sopenharmony_ci	mov.l		%d7, %d5
39362306a36Sopenharmony_ci	bsr.l		ldmm2
39462306a36Sopenharmony_ci	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
39562306a36Sopenharmony_ci	mov.l		%d6, %d3
39662306a36Sopenharmony_ci	mov.l		(%sp)+, %d5		# restore dividend
39762306a36Sopenharmony_ci	mov.l		(%sp)+, %d6
39862306a36Sopenharmony_ci	sub.l		%d3, %d6
39962306a36Sopenharmony_ci	subx.l		%d2, %d5		# subtract double precision
40062306a36Sopenharmony_ci	bcc		ldd2nd			# no carry, do next quotient digit
40162306a36Sopenharmony_ci	subq.l		&0x1, %d1		# q is one too large
40262306a36Sopenharmony_ci# need to add back divisor longword to current ms 3 digits of dividend
40362306a36Sopenharmony_ci# - according to Knuth, this is done only 2 out of 65536 times for random
40462306a36Sopenharmony_ci# divisor, dividend selection.
40562306a36Sopenharmony_ci	clr.l		%d2
40662306a36Sopenharmony_ci	mov.l		%d7, %d3
40762306a36Sopenharmony_ci	swap		%d3
40862306a36Sopenharmony_ci	clr.w		%d3			# %d3 now ls word of divisor
40962306a36Sopenharmony_ci	add.l		%d3, %d6		# aligned with 3rd word of dividend
41062306a36Sopenharmony_ci	addx.l		%d2, %d5
41162306a36Sopenharmony_ci	mov.l		%d7, %d3
41262306a36Sopenharmony_ci	clr.w		%d3			# %d3 now ms word of divisor
41362306a36Sopenharmony_ci	swap		%d3			# aligned with 2nd word of dividend
41462306a36Sopenharmony_ci	add.l		%d3, %d5
41562306a36Sopenharmony_cildd2nd:
41662306a36Sopenharmony_ci	tst.b		DDSECOND(%a6)	# both q words done?
41762306a36Sopenharmony_ci	bne.b		lddremain
41862306a36Sopenharmony_ci# first quotient digit now correct. store digit and shift the
41962306a36Sopenharmony_ci# (subtracted) dividend
42062306a36Sopenharmony_ci	mov.w		%d1, DDQUOTIENT(%a6)
42162306a36Sopenharmony_ci	clr.l		%d1
42262306a36Sopenharmony_ci	swap		%d5
42362306a36Sopenharmony_ci	swap		%d6
42462306a36Sopenharmony_ci	mov.w		%d6, %d5
42562306a36Sopenharmony_ci	clr.w		%d6
42662306a36Sopenharmony_ci	st		DDSECOND(%a6)		# second digit
42762306a36Sopenharmony_ci	bra.w		lddnormalized
42862306a36Sopenharmony_cilddremain:
42962306a36Sopenharmony_ci# add 2nd word to quotient, get the remainder.
43062306a36Sopenharmony_ci	mov.w		%d1, DDQUOTIENT+2(%a6)
43162306a36Sopenharmony_ci# shift down one word/digit to renormalize remainder.
43262306a36Sopenharmony_ci	mov.w		%d5, %d6
43362306a36Sopenharmony_ci	swap		%d6
43462306a36Sopenharmony_ci	swap		%d5
43562306a36Sopenharmony_ci	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
43662306a36Sopenharmony_ci	beq.b		lddrn
43762306a36Sopenharmony_ci	subq.l		&0x1, %d7		# set for loop count
43862306a36Sopenharmony_cilddnlp:
43962306a36Sopenharmony_ci	lsr.l		&0x1, %d5		# shift into %d6
44062306a36Sopenharmony_ci	roxr.l		&0x1, %d6
44162306a36Sopenharmony_ci	dbf		%d7, lddnlp
44262306a36Sopenharmony_cilddrn:
44362306a36Sopenharmony_ci	mov.l		%d6, %d5		# remainder
44462306a36Sopenharmony_ci	mov.l		DDQUOTIENT(%a6), %d6	# quotient
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	rts
44762306a36Sopenharmony_cildmm2:
44862306a36Sopenharmony_ci# factors for the 32X32->64 multiplication are in %d5 and %d6.
44962306a36Sopenharmony_ci# returns 64 bit result in %d5 (hi) %d6(lo).
45062306a36Sopenharmony_ci# destroys %d2,%d3,%d4.
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci# multiply hi,lo words of each factor to get 4 intermediate products
45362306a36Sopenharmony_ci	mov.l		%d6, %d2
45462306a36Sopenharmony_ci	mov.l		%d6, %d3
45562306a36Sopenharmony_ci	mov.l		%d5, %d4
45662306a36Sopenharmony_ci	swap		%d3
45762306a36Sopenharmony_ci	swap		%d4
45862306a36Sopenharmony_ci	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
45962306a36Sopenharmony_ci	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
46062306a36Sopenharmony_ci	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
46162306a36Sopenharmony_ci	mulu.w		%d4, %d3		# %d3 <- msw*msw
46262306a36Sopenharmony_ci# now use swap and addx to consolidate to two longwords
46362306a36Sopenharmony_ci	clr.l		%d4
46462306a36Sopenharmony_ci	swap		%d6
46562306a36Sopenharmony_ci	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
46662306a36Sopenharmony_ci	addx.w		%d4, %d3		# add any carry to m*m product
46762306a36Sopenharmony_ci	add.w		%d2, %d6		# add in lsw of other m*l product
46862306a36Sopenharmony_ci	addx.w		%d4, %d3		# add any carry to m*m product
46962306a36Sopenharmony_ci	swap		%d6			# %d6 is low 32 bits of final product
47062306a36Sopenharmony_ci	clr.w		%d5
47162306a36Sopenharmony_ci	clr.w		%d2			# lsw of two mixed products used,
47262306a36Sopenharmony_ci	swap		%d5			# now use msws of longwords
47362306a36Sopenharmony_ci	swap		%d2
47462306a36Sopenharmony_ci	add.l		%d2, %d5
47562306a36Sopenharmony_ci	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
47662306a36Sopenharmony_ci	rts
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci#########################################################################
47962306a36Sopenharmony_ci# XDEF ****************************************************************	#
48062306a36Sopenharmony_ci#	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
48162306a36Sopenharmony_ci#	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
48262306a36Sopenharmony_ci#									#
48362306a36Sopenharmony_ci#	This is the library version which is accessed as a subroutine	#
48462306a36Sopenharmony_ci#	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
48562306a36Sopenharmony_ci#	64-bit multiply instruction.					#
48662306a36Sopenharmony_ci#									#
48762306a36Sopenharmony_ci# XREF ****************************************************************	#
48862306a36Sopenharmony_ci#	None								#
48962306a36Sopenharmony_ci#									#
49062306a36Sopenharmony_ci# INPUT ***************************************************************	#
49162306a36Sopenharmony_ci#	0x4(sp) = multiplier						#
49262306a36Sopenharmony_ci#	0x8(sp) = multiplicand						#
49362306a36Sopenharmony_ci#	0xc(sp) = pointer to location to place 64-bit result		#
49462306a36Sopenharmony_ci#									#
49562306a36Sopenharmony_ci# OUTPUT **************************************************************	#
49662306a36Sopenharmony_ci#	0xc(sp) = points to location of 64-bit result			#
49762306a36Sopenharmony_ci#									#
49862306a36Sopenharmony_ci# ALGORITHM ***********************************************************	#
49962306a36Sopenharmony_ci#	Perform the multiply in pieces using 16x16->32 unsigned		#
50062306a36Sopenharmony_ci# multiplies and "add" instructions.					#
50162306a36Sopenharmony_ci#	Set the condition codes as appropriate before performing an	#
50262306a36Sopenharmony_ci# "rts".								#
50362306a36Sopenharmony_ci#									#
50462306a36Sopenharmony_ci#########################################################################
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ciset MUL64_CC, -4
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	global		_060LSP__imulu64_
50962306a36Sopenharmony_ci_060LSP__imulu64_:
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
51262306a36Sopenharmony_ci	link.w		%a6,&-4
51362306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
51462306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
51562306a36Sopenharmony_ci# PROLOGUE END ##########################################################
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	mov.l		0x8(%a6),%d0		# store multiplier in d0
52062306a36Sopenharmony_ci	beq.w		mulu64_zero		# handle zero separately
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	mov.l		0xc(%a6),%d1		# get multiplicand in d1
52362306a36Sopenharmony_ci	beq.w		mulu64_zero		# handle zero separately
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci#########################################################################
52662306a36Sopenharmony_ci#	63			   32				0	#
52762306a36Sopenharmony_ci#	----------------------------					#
52862306a36Sopenharmony_ci#	| hi(mplier) * hi(mplicand)|					#
52962306a36Sopenharmony_ci#	----------------------------					#
53062306a36Sopenharmony_ci#		     -----------------------------			#
53162306a36Sopenharmony_ci#		     | hi(mplier) * lo(mplicand) |			#
53262306a36Sopenharmony_ci#		     -----------------------------			#
53362306a36Sopenharmony_ci#		     -----------------------------			#
53462306a36Sopenharmony_ci#		     | lo(mplier) * hi(mplicand) |			#
53562306a36Sopenharmony_ci#		     -----------------------------			#
53662306a36Sopenharmony_ci#	  |			   -----------------------------	#
53762306a36Sopenharmony_ci#	--|--			   | lo(mplier) * lo(mplicand) |	#
53862306a36Sopenharmony_ci#	  |			   -----------------------------	#
53962306a36Sopenharmony_ci#	========================================================	#
54062306a36Sopenharmony_ci#	--------------------------------------------------------	#
54162306a36Sopenharmony_ci#	|	hi(result)	   |	    lo(result)         |	#
54262306a36Sopenharmony_ci#	--------------------------------------------------------	#
54362306a36Sopenharmony_ci#########################################################################
54462306a36Sopenharmony_cimulu64_alg:
54562306a36Sopenharmony_ci# load temp registers with operands
54662306a36Sopenharmony_ci	mov.l		%d0,%d2			# mr in d2
54762306a36Sopenharmony_ci	mov.l		%d0,%d3			# mr in d3
54862306a36Sopenharmony_ci	mov.l		%d1,%d4			# md in d4
54962306a36Sopenharmony_ci	swap		%d3			# hi(mr) in lo d3
55062306a36Sopenharmony_ci	swap		%d4			# hi(md) in lo d4
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci# complete necessary multiplies:
55362306a36Sopenharmony_ci	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
55462306a36Sopenharmony_ci	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
55562306a36Sopenharmony_ci	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
55662306a36Sopenharmony_ci	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci# add lo portions of [2],[3] to hi portion of [1].
55962306a36Sopenharmony_ci# add carries produced from these adds to [4].
56062306a36Sopenharmony_ci# lo([1]) is the final lo 16 bits of the result.
56162306a36Sopenharmony_ci	clr.l		%d4			# load d4 w/ zero value
56262306a36Sopenharmony_ci	swap		%d0			# hi([1]) <==> lo([1])
56362306a36Sopenharmony_ci	add.w		%d1,%d0			# hi([1]) + lo([2])
56462306a36Sopenharmony_ci	addx.l		%d4,%d3			#    [4]  + carry
56562306a36Sopenharmony_ci	add.w		%d2,%d0			# hi([1]) + lo([3])
56662306a36Sopenharmony_ci	addx.l		%d4,%d3			#    [4]  + carry
56762306a36Sopenharmony_ci	swap		%d0			# lo([1]) <==> hi([1])
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci# lo portions of [2],[3] have been added in to final result.
57062306a36Sopenharmony_ci# now, clear lo, put hi in lo reg, and add to [4]
57162306a36Sopenharmony_ci	clr.w		%d1			# clear lo([2])
57262306a36Sopenharmony_ci	clr.w		%d2			# clear hi([3])
57362306a36Sopenharmony_ci	swap		%d1			# hi([2]) in lo d1
57462306a36Sopenharmony_ci	swap		%d2			# hi([3]) in lo d2
57562306a36Sopenharmony_ci	add.l		%d2,%d1			#    [4]  + hi([2])
57662306a36Sopenharmony_ci	add.l		%d3,%d1			#    [4]  + hi([3])
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci# now, grab the condition codes. only one that can be set is 'N'.
57962306a36Sopenharmony_ci# 'N' CAN be set if the operation is unsigned if bit 63 is set.
58062306a36Sopenharmony_ci	mov.w		MUL64_CC(%a6),%d4
58162306a36Sopenharmony_ci	andi.b		&0x10,%d4		# keep old 'X' bit
58262306a36Sopenharmony_ci	tst.l		%d1			# may set 'N' bit
58362306a36Sopenharmony_ci	bpl.b		mulu64_ddone
58462306a36Sopenharmony_ci	ori.b		&0x8,%d4		# set 'N' bit
58562306a36Sopenharmony_cimulu64_ddone:
58662306a36Sopenharmony_ci	mov.w		%d4,%cc
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci# here, the result is in d1 and d0. the current strategy is to save
58962306a36Sopenharmony_ci# the values at the location pointed to by a0.
59062306a36Sopenharmony_ci# use movm here to not disturb the condition codes.
59162306a36Sopenharmony_cimulu64_end:
59262306a36Sopenharmony_ci	exg		%d1,%d0
59362306a36Sopenharmony_ci	movm.l		&0x0003,([0x10,%a6])		# save result
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci# EPILOGUE BEGIN ########################################################
59662306a36Sopenharmony_ci#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
59762306a36Sopenharmony_ci	movm.l		(%sp)+,&0x001c		# restore d2-d4
59862306a36Sopenharmony_ci	unlk		%a6
59962306a36Sopenharmony_ci# EPILOGUE END ##########################################################
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	rts
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci# one or both of the operands is zero so the result is also zero.
60462306a36Sopenharmony_ci# save the zero result to the register file and set the 'Z' ccode bit.
60562306a36Sopenharmony_cimulu64_zero:
60662306a36Sopenharmony_ci	clr.l		%d0
60762306a36Sopenharmony_ci	clr.l		%d1
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	mov.w		MUL64_CC(%a6),%d4
61062306a36Sopenharmony_ci	andi.b		&0x10,%d4
61162306a36Sopenharmony_ci	ori.b		&0x4,%d4
61262306a36Sopenharmony_ci	mov.w		%d4,%cc			# set 'Z' ccode bit
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	bra.b		mulu64_end
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci##########
61762306a36Sopenharmony_ci# muls.l #
61862306a36Sopenharmony_ci##########
61962306a36Sopenharmony_ci	global		_060LSP__imuls64_
62062306a36Sopenharmony_ci_060LSP__imuls64_:
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
62362306a36Sopenharmony_ci	link.w		%a6,&-4
62462306a36Sopenharmony_ci	movm.l		&0x3c00,-(%sp)		# save d2-d5
62562306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
62662306a36Sopenharmony_ci# PROLOGUE END ##########################################################
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	mov.l		0x8(%a6),%d0		# store multiplier in d0
63162306a36Sopenharmony_ci	beq.b		mulu64_zero		# handle zero separately
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	mov.l		0xc(%a6),%d1		# get multiplicand in d1
63462306a36Sopenharmony_ci	beq.b		mulu64_zero		# handle zero separately
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	clr.b		%d5			# clear sign tag
63762306a36Sopenharmony_ci	tst.l		%d0			# is multiplier negative?
63862306a36Sopenharmony_ci	bge.b		muls64_chk_md_sgn	# no
63962306a36Sopenharmony_ci	neg.l		%d0			# make multiplier positive
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	ori.b		&0x1,%d5		# save multiplier sgn
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci# the result sign is the exclusive or of the operand sign bits.
64462306a36Sopenharmony_cimuls64_chk_md_sgn:
64562306a36Sopenharmony_ci	tst.l		%d1			# is multiplicand negative?
64662306a36Sopenharmony_ci	bge.b		muls64_alg		# no
64762306a36Sopenharmony_ci	neg.l		%d1			# make multiplicand positive
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	eori.b		&0x1,%d5		# calculate correct sign
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci#########################################################################
65262306a36Sopenharmony_ci#	63			   32				0	#
65362306a36Sopenharmony_ci#	----------------------------					#
65462306a36Sopenharmony_ci#	| hi(mplier) * hi(mplicand)|					#
65562306a36Sopenharmony_ci#	----------------------------					#
65662306a36Sopenharmony_ci#		     -----------------------------			#
65762306a36Sopenharmony_ci#		     | hi(mplier) * lo(mplicand) |			#
65862306a36Sopenharmony_ci#		     -----------------------------			#
65962306a36Sopenharmony_ci#		     -----------------------------			#
66062306a36Sopenharmony_ci#		     | lo(mplier) * hi(mplicand) |			#
66162306a36Sopenharmony_ci#		     -----------------------------			#
66262306a36Sopenharmony_ci#	  |			   -----------------------------	#
66362306a36Sopenharmony_ci#	--|--			   | lo(mplier) * lo(mplicand) |	#
66462306a36Sopenharmony_ci#	  |			   -----------------------------	#
66562306a36Sopenharmony_ci#	========================================================	#
66662306a36Sopenharmony_ci#	--------------------------------------------------------	#
66762306a36Sopenharmony_ci#	|	hi(result)	   |	    lo(result)         |	#
66862306a36Sopenharmony_ci#	--------------------------------------------------------	#
66962306a36Sopenharmony_ci#########################################################################
67062306a36Sopenharmony_cimuls64_alg:
67162306a36Sopenharmony_ci# load temp registers with operands
67262306a36Sopenharmony_ci	mov.l		%d0,%d2			# mr in d2
67362306a36Sopenharmony_ci	mov.l		%d0,%d3			# mr in d3
67462306a36Sopenharmony_ci	mov.l		%d1,%d4			# md in d4
67562306a36Sopenharmony_ci	swap		%d3			# hi(mr) in lo d3
67662306a36Sopenharmony_ci	swap		%d4			# hi(md) in lo d4
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci# complete necessary multiplies:
67962306a36Sopenharmony_ci	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
68062306a36Sopenharmony_ci	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
68162306a36Sopenharmony_ci	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
68262306a36Sopenharmony_ci	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci# add lo portions of [2],[3] to hi portion of [1].
68562306a36Sopenharmony_ci# add carries produced from these adds to [4].
68662306a36Sopenharmony_ci# lo([1]) is the final lo 16 bits of the result.
68762306a36Sopenharmony_ci	clr.l		%d4			# load d4 w/ zero value
68862306a36Sopenharmony_ci	swap		%d0			# hi([1]) <==> lo([1])
68962306a36Sopenharmony_ci	add.w		%d1,%d0			# hi([1]) + lo([2])
69062306a36Sopenharmony_ci	addx.l		%d4,%d3			#    [4]  + carry
69162306a36Sopenharmony_ci	add.w		%d2,%d0			# hi([1]) + lo([3])
69262306a36Sopenharmony_ci	addx.l		%d4,%d3			#    [4]  + carry
69362306a36Sopenharmony_ci	swap		%d0			# lo([1]) <==> hi([1])
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci# lo portions of [2],[3] have been added in to final result.
69662306a36Sopenharmony_ci# now, clear lo, put hi in lo reg, and add to [4]
69762306a36Sopenharmony_ci	clr.w		%d1			# clear lo([2])
69862306a36Sopenharmony_ci	clr.w		%d2			# clear hi([3])
69962306a36Sopenharmony_ci	swap		%d1			# hi([2]) in lo d1
70062306a36Sopenharmony_ci	swap		%d2			# hi([3]) in lo d2
70162306a36Sopenharmony_ci	add.l		%d2,%d1			#    [4]  + hi([2])
70262306a36Sopenharmony_ci	add.l		%d3,%d1			#    [4]  + hi([3])
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	tst.b		%d5			# should result be signed?
70562306a36Sopenharmony_ci	beq.b		muls64_done		# no
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci# result should be a signed negative number.
70862306a36Sopenharmony_ci# compute 2's complement of the unsigned number:
70962306a36Sopenharmony_ci#   -negate all bits and add 1
71062306a36Sopenharmony_cimuls64_neg:
71162306a36Sopenharmony_ci	not.l		%d0			# negate lo(result) bits
71262306a36Sopenharmony_ci	not.l		%d1			# negate hi(result) bits
71362306a36Sopenharmony_ci	addq.l		&1,%d0			# add 1 to lo(result)
71462306a36Sopenharmony_ci	addx.l		%d4,%d1			# add carry to hi(result)
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_cimuls64_done:
71762306a36Sopenharmony_ci	mov.w		MUL64_CC(%a6),%d4
71862306a36Sopenharmony_ci	andi.b		&0x10,%d4		# keep old 'X' bit
71962306a36Sopenharmony_ci	tst.l		%d1			# may set 'N' bit
72062306a36Sopenharmony_ci	bpl.b		muls64_ddone
72162306a36Sopenharmony_ci	ori.b		&0x8,%d4		# set 'N' bit
72262306a36Sopenharmony_cimuls64_ddone:
72362306a36Sopenharmony_ci	mov.w		%d4,%cc
72462306a36Sopenharmony_ci
72562306a36Sopenharmony_ci# here, the result is in d1 and d0. the current strategy is to save
72662306a36Sopenharmony_ci# the values at the location pointed to by a0.
72762306a36Sopenharmony_ci# use movm here to not disturb the condition codes.
72862306a36Sopenharmony_cimuls64_end:
72962306a36Sopenharmony_ci	exg		%d1,%d0
73062306a36Sopenharmony_ci	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci# EPILOGUE BEGIN ########################################################
73362306a36Sopenharmony_ci#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
73462306a36Sopenharmony_ci	movm.l		(%sp)+,&0x003c		# restore d2-d5
73562306a36Sopenharmony_ci	unlk		%a6
73662306a36Sopenharmony_ci# EPILOGUE END ##########################################################
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	rts
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci# one or both of the operands is zero so the result is also zero.
74162306a36Sopenharmony_ci# save the zero result to the register file and set the 'Z' ccode bit.
74262306a36Sopenharmony_cimuls64_zero:
74362306a36Sopenharmony_ci	clr.l		%d0
74462306a36Sopenharmony_ci	clr.l		%d1
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	mov.w		MUL64_CC(%a6),%d4
74762306a36Sopenharmony_ci	andi.b		&0x10,%d4
74862306a36Sopenharmony_ci	ori.b		&0x4,%d4
74962306a36Sopenharmony_ci	mov.w		%d4,%cc			# set 'Z' ccode bit
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	bra.b		muls64_end
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci#########################################################################
75462306a36Sopenharmony_ci# XDEF ****************************************************************	#
75562306a36Sopenharmony_ci#	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
75662306a36Sopenharmony_ci#	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
75762306a36Sopenharmony_ci#	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
75862306a36Sopenharmony_ci#	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
75962306a36Sopenharmony_ci#	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
76062306a36Sopenharmony_ci#	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
76162306a36Sopenharmony_ci#									#
76262306a36Sopenharmony_ci#	This is the library version which is accessed as a subroutine	#
76362306a36Sopenharmony_ci#	and therefore does not work exactly like the 680X0 "cmp2"	#
76462306a36Sopenharmony_ci#	instruction.							#
76562306a36Sopenharmony_ci#									#
76662306a36Sopenharmony_ci# XREF ****************************************************************	#
76762306a36Sopenharmony_ci#	None								#
76862306a36Sopenharmony_ci#									#
76962306a36Sopenharmony_ci# INPUT ***************************************************************	#
77062306a36Sopenharmony_ci#	0x4(sp) = Rn							#
77162306a36Sopenharmony_ci#	0x8(sp) = pointer to boundary pair				#
77262306a36Sopenharmony_ci#									#
77362306a36Sopenharmony_ci# OUTPUT **************************************************************	#
77462306a36Sopenharmony_ci#	cc = condition codes are set correctly				#
77562306a36Sopenharmony_ci#									#
77662306a36Sopenharmony_ci# ALGORITHM ***********************************************************	#
77762306a36Sopenharmony_ci#	In the interest of simplicity, all operands are converted to	#
77862306a36Sopenharmony_ci# longword size whether the operation is byte, word, or long. The	#
77962306a36Sopenharmony_ci# bounds are sign extended accordingly. If Rn is a data register, Rn is #
78062306a36Sopenharmony_ci# also sign extended. If Rn is an address register, it need not be sign #
78162306a36Sopenharmony_ci# extended since the full register is always used.			#
78262306a36Sopenharmony_ci#	The condition codes are set correctly before the final "rts".	#
78362306a36Sopenharmony_ci#									#
78462306a36Sopenharmony_ci#########################################################################
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ciset	CMP2_CC,	-4
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	global		_060LSP__cmp2_Ab_
78962306a36Sopenharmony_ci_060LSP__cmp2_Ab_:
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
79262306a36Sopenharmony_ci	link.w		%a6,&-4
79362306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
79462306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
79562306a36Sopenharmony_ci# PROLOGUE END ##########################################################
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	mov.w		%cc,CMP2_CC(%a6)
79862306a36Sopenharmony_ci	mov.l		0x8(%a6), %d2		# get regval
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	mov.b		([0xc,%a6],0x0),%d0
80162306a36Sopenharmony_ci	mov.b		([0xc,%a6],0x1),%d1
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	extb.l		%d0			# sign extend lo bnd
80462306a36Sopenharmony_ci	extb.l		%d1			# sign extend hi bnd
80562306a36Sopenharmony_ci	bra.w		l_cmp2_cmp		# go do the compare emulation
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	global		_060LSP__cmp2_Aw_
80862306a36Sopenharmony_ci_060LSP__cmp2_Aw_:
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
81162306a36Sopenharmony_ci	link.w		%a6,&-4
81262306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
81362306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
81462306a36Sopenharmony_ci# PROLOGUE END ##########################################################
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	mov.w		%cc,CMP2_CC(%a6)
81762306a36Sopenharmony_ci	mov.l		0x8(%a6), %d2		# get regval
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	mov.w		([0xc,%a6],0x0),%d0
82062306a36Sopenharmony_ci	mov.w		([0xc,%a6],0x2),%d1
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	ext.l		%d0			# sign extend lo bnd
82362306a36Sopenharmony_ci	ext.l		%d1			# sign extend hi bnd
82462306a36Sopenharmony_ci	bra.w		l_cmp2_cmp		# go do the compare emulation
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	global		_060LSP__cmp2_Al_
82762306a36Sopenharmony_ci_060LSP__cmp2_Al_:
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
83062306a36Sopenharmony_ci	link.w		%a6,&-4
83162306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
83262306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
83362306a36Sopenharmony_ci# PROLOGUE END ##########################################################
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	mov.w		%cc,CMP2_CC(%a6)
83662306a36Sopenharmony_ci	mov.l		0x8(%a6), %d2		# get regval
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	mov.l		([0xc,%a6],0x0),%d0
83962306a36Sopenharmony_ci	mov.l		([0xc,%a6],0x4),%d1
84062306a36Sopenharmony_ci	bra.w		l_cmp2_cmp		# go do the compare emulation
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	global		_060LSP__cmp2_Db_
84362306a36Sopenharmony_ci_060LSP__cmp2_Db_:
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
84662306a36Sopenharmony_ci	link.w		%a6,&-4
84762306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
84862306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
84962306a36Sopenharmony_ci# PROLOGUE END ##########################################################
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	mov.w		%cc,CMP2_CC(%a6)
85262306a36Sopenharmony_ci	mov.l		0x8(%a6), %d2		# get regval
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	mov.b		([0xc,%a6],0x0),%d0
85562306a36Sopenharmony_ci	mov.b		([0xc,%a6],0x1),%d1
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	extb.l		%d0			# sign extend lo bnd
85862306a36Sopenharmony_ci	extb.l		%d1			# sign extend hi bnd
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci# operation is a data register compare.
86162306a36Sopenharmony_ci# sign extend byte to long so we can do simple longword compares.
86262306a36Sopenharmony_ci	extb.l		%d2			# sign extend data byte
86362306a36Sopenharmony_ci	bra.w		l_cmp2_cmp		# go do the compare emulation
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	global		_060LSP__cmp2_Dw_
86662306a36Sopenharmony_ci_060LSP__cmp2_Dw_:
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
86962306a36Sopenharmony_ci	link.w		%a6,&-4
87062306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
87162306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
87262306a36Sopenharmony_ci# PROLOGUE END ##########################################################
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	mov.w		%cc,CMP2_CC(%a6)
87562306a36Sopenharmony_ci	mov.l		0x8(%a6), %d2		# get regval
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	mov.w		([0xc,%a6],0x0),%d0
87862306a36Sopenharmony_ci	mov.w		([0xc,%a6],0x2),%d1
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	ext.l		%d0			# sign extend lo bnd
88162306a36Sopenharmony_ci	ext.l		%d1			# sign extend hi bnd
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci# operation is a data register compare.
88462306a36Sopenharmony_ci# sign extend word to long so we can do simple longword compares.
88562306a36Sopenharmony_ci	ext.l		%d2			# sign extend data word
88662306a36Sopenharmony_ci	bra.w		l_cmp2_cmp		# go emulate compare
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	global		_060LSP__cmp2_Dl_
88962306a36Sopenharmony_ci_060LSP__cmp2_Dl_:
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci# PROLOGUE BEGIN ########################################################
89262306a36Sopenharmony_ci	link.w		%a6,&-4
89362306a36Sopenharmony_ci	movm.l		&0x3800,-(%sp)		# save d2-d4
89462306a36Sopenharmony_ci#	fmovm.l		&0x0,-(%sp)		# save no fpregs
89562306a36Sopenharmony_ci# PROLOGUE END ##########################################################
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	mov.w		%cc,CMP2_CC(%a6)
89862306a36Sopenharmony_ci	mov.l		0x8(%a6), %d2		# get regval
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	mov.l		([0xc,%a6],0x0),%d0
90162306a36Sopenharmony_ci	mov.l		([0xc,%a6],0x4),%d1
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci#
90462306a36Sopenharmony_ci# To set the ccodes correctly:
90562306a36Sopenharmony_ci#	(1) save 'Z' bit from (Rn - lo)
90662306a36Sopenharmony_ci#	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
90762306a36Sopenharmony_ci#	(3) keep 'X', 'N', and 'V' from before instruction
90862306a36Sopenharmony_ci#	(4) combine ccodes
90962306a36Sopenharmony_ci#
91062306a36Sopenharmony_cil_cmp2_cmp:
91162306a36Sopenharmony_ci	sub.l		%d0, %d2		# (Rn - lo)
91262306a36Sopenharmony_ci	mov.w		%cc, %d3		# fetch resulting ccodes
91362306a36Sopenharmony_ci	andi.b		&0x4, %d3		# keep 'Z' bit
91462306a36Sopenharmony_ci	sub.l		%d0, %d1		# (hi - lo)
91562306a36Sopenharmony_ci	cmp.l		%d1,%d2			# ((hi - lo) - (Rn - hi))
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	mov.w		%cc, %d4		# fetch resulting ccodes
91862306a36Sopenharmony_ci	or.b		%d4, %d3		# combine w/ earlier ccodes
91962306a36Sopenharmony_ci	andi.b		&0x5, %d3		# keep 'Z' and 'N'
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
92262306a36Sopenharmony_ci	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
92362306a36Sopenharmony_ci	or.b		%d3, %d4		# insert new ccodes
92462306a36Sopenharmony_ci	mov.w		%d4,%cc			# save new ccodes
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci# EPILOGUE BEGIN ########################################################
92762306a36Sopenharmony_ci#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
92862306a36Sopenharmony_ci	movm.l		(%sp)+,&0x001c		# restore d2-d4
92962306a36Sopenharmony_ci	unlk		%a6
93062306a36Sopenharmony_ci# EPILOGUE END ##########################################################
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_ci	rts
933