18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  Copyright (C) 2003-2013 Altera Corporation
48c2ecf20Sopenharmony_ci *  All rights reserved.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/entry.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci.set noat
128c2ecf20Sopenharmony_ci.set nobreak
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci/*
158c2ecf20Sopenharmony_ci* Explicitly allow the use of r1 (the assembler temporary register)
168c2ecf20Sopenharmony_ci* within this code. This register is normally reserved for the use of
178c2ecf20Sopenharmony_ci* the compiler.
188c2ecf20Sopenharmony_ci*/
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ciENTRY(instruction_trap)
218c2ecf20Sopenharmony_ci	ldw	r1, PT_R1(sp)		// Restore registers
228c2ecf20Sopenharmony_ci	ldw	r2, PT_R2(sp)
238c2ecf20Sopenharmony_ci	ldw	r3, PT_R3(sp)
248c2ecf20Sopenharmony_ci	ldw	r4, PT_R4(sp)
258c2ecf20Sopenharmony_ci	ldw	r5, PT_R5(sp)
268c2ecf20Sopenharmony_ci	ldw	r6, PT_R6(sp)
278c2ecf20Sopenharmony_ci	ldw	r7, PT_R7(sp)
288c2ecf20Sopenharmony_ci	ldw	r8, PT_R8(sp)
298c2ecf20Sopenharmony_ci	ldw	r9, PT_R9(sp)
308c2ecf20Sopenharmony_ci	ldw	r10, PT_R10(sp)
318c2ecf20Sopenharmony_ci	ldw	r11, PT_R11(sp)
328c2ecf20Sopenharmony_ci	ldw	r12, PT_R12(sp)
338c2ecf20Sopenharmony_ci	ldw	r13, PT_R13(sp)
348c2ecf20Sopenharmony_ci	ldw	r14, PT_R14(sp)
358c2ecf20Sopenharmony_ci	ldw	r15, PT_R15(sp)
368c2ecf20Sopenharmony_ci	ldw	ra, PT_RA(sp)
378c2ecf20Sopenharmony_ci	ldw	fp, PT_FP(sp)
388c2ecf20Sopenharmony_ci	ldw	gp, PT_GP(sp)
398c2ecf20Sopenharmony_ci	ldw	et, PT_ESTATUS(sp)
408c2ecf20Sopenharmony_ci	wrctl	estatus, et
418c2ecf20Sopenharmony_ci	ldw	ea, PT_EA(sp)
428c2ecf20Sopenharmony_ci	ldw	et, PT_SP(sp)		/* backup sp in et */
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	addi	sp, sp, PT_REGS_SIZE
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	/* INSTRUCTION EMULATION
478c2ecf20Sopenharmony_ci	*  ---------------------
488c2ecf20Sopenharmony_ci	*
498c2ecf20Sopenharmony_ci	* Nios II processors generate exceptions for unimplemented instructions.
508c2ecf20Sopenharmony_ci	* The routines below emulate these instructions.  Depending on the
518c2ecf20Sopenharmony_ci	* processor core, the only instructions that might need to be emulated
528c2ecf20Sopenharmony_ci	* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
538c2ecf20Sopenharmony_ci	*
548c2ecf20Sopenharmony_ci	* The emulations match the instructions, except for the following
558c2ecf20Sopenharmony_ci	* limitations:
568c2ecf20Sopenharmony_ci	*
578c2ecf20Sopenharmony_ci	* 1) The emulation routines do not emulate the use of the exception
588c2ecf20Sopenharmony_ci	*    temporary register (et) as a source operand because the exception
598c2ecf20Sopenharmony_ci	*    handler already has modified it.
608c2ecf20Sopenharmony_ci	*
618c2ecf20Sopenharmony_ci	* 2) The routines do not emulate the use of the stack pointer (sp) or
628c2ecf20Sopenharmony_ci	*    the exception return address register (ea) as a destination because
638c2ecf20Sopenharmony_ci	*    modifying these registers crashes the exception handler or the
648c2ecf20Sopenharmony_ci	*    interrupted routine.
658c2ecf20Sopenharmony_ci	*
668c2ecf20Sopenharmony_ci	* Detailed Design
678c2ecf20Sopenharmony_ci	* ---------------
688c2ecf20Sopenharmony_ci	*
698c2ecf20Sopenharmony_ci	* The emulation routines expect the contents of integer registers r0-r31
708c2ecf20Sopenharmony_ci	* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp).  The
718c2ecf20Sopenharmony_ci	* routines retrieve source operands from the stack and modify the
728c2ecf20Sopenharmony_ci	* destination register's value on the stack prior to the end of the
738c2ecf20Sopenharmony_ci	* exception handler.  Then all registers except the destination register
748c2ecf20Sopenharmony_ci	* are restored to their previous values.
758c2ecf20Sopenharmony_ci	*
768c2ecf20Sopenharmony_ci	* The instruction that causes the exception is found at address -4(ea).
778c2ecf20Sopenharmony_ci	* The instruction's OP and OPX fields identify the operation to be
788c2ecf20Sopenharmony_ci	* performed.
798c2ecf20Sopenharmony_ci	*
808c2ecf20Sopenharmony_ci	* One instruction, muli, is an I-type instruction that is identified by
818c2ecf20Sopenharmony_ci	* an OP field of 0x24.
828c2ecf20Sopenharmony_ci	*
838c2ecf20Sopenharmony_ci	* muli   AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
848c2ecf20Sopenharmony_ci	*           27    22                6      0    <-- LSB of field
858c2ecf20Sopenharmony_ci	*
868c2ecf20Sopenharmony_ci	* The remaining emulated instructions are R-type and have an OP field
878c2ecf20Sopenharmony_ci	* of 0x3a.  Their OPX fields identify them.
888c2ecf20Sopenharmony_ci	*
898c2ecf20Sopenharmony_ci	* R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
908c2ecf20Sopenharmony_ci	*           27    22    17     11     6      0  <-- LSB of field
918c2ecf20Sopenharmony_ci	*
928c2ecf20Sopenharmony_ci	*
938c2ecf20Sopenharmony_ci	* Opcode Encoding.  muli is identified by its OP value.  Then OPX & 0x02
948c2ecf20Sopenharmony_ci	* is used to differentiate between the division opcodes and the
958c2ecf20Sopenharmony_ci	* remaining multiplication opcodes.
968c2ecf20Sopenharmony_ci	*
978c2ecf20Sopenharmony_ci	* Instruction   OP      OPX    OPX & 0x02
988c2ecf20Sopenharmony_ci	* -----------   ----    ----   ----------
998c2ecf20Sopenharmony_ci	* muli          0x24
1008c2ecf20Sopenharmony_ci	* divu          0x3a    0x24         0
1018c2ecf20Sopenharmony_ci	* div           0x3a    0x25         0
1028c2ecf20Sopenharmony_ci	* mul           0x3a    0x27      != 0
1038c2ecf20Sopenharmony_ci	* mulxuu        0x3a    0x07      != 0
1048c2ecf20Sopenharmony_ci	* mulxsu        0x3a    0x17      != 0
1058c2ecf20Sopenharmony_ci	* mulxss        0x3a    0x1f      != 0
1068c2ecf20Sopenharmony_ci	*/
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	/*
1108c2ecf20Sopenharmony_ci	* Save everything on the stack to make it easy for the emulation
1118c2ecf20Sopenharmony_ci	* routines to retrieve the source register operands.
1128c2ecf20Sopenharmony_ci	*/
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	addi sp, sp, -128
1158c2ecf20Sopenharmony_ci	stw zero, 0(sp)	/* Save zero on stack to avoid special case for r0. */
1168c2ecf20Sopenharmony_ci	stw r1, 4(sp)
1178c2ecf20Sopenharmony_ci	stw r2,  8(sp)
1188c2ecf20Sopenharmony_ci	stw r3, 12(sp)
1198c2ecf20Sopenharmony_ci	stw r4, 16(sp)
1208c2ecf20Sopenharmony_ci	stw r5, 20(sp)
1218c2ecf20Sopenharmony_ci	stw r6, 24(sp)
1228c2ecf20Sopenharmony_ci	stw r7, 28(sp)
1238c2ecf20Sopenharmony_ci	stw r8, 32(sp)
1248c2ecf20Sopenharmony_ci	stw r9, 36(sp)
1258c2ecf20Sopenharmony_ci	stw r10, 40(sp)
1268c2ecf20Sopenharmony_ci	stw r11, 44(sp)
1278c2ecf20Sopenharmony_ci	stw r12, 48(sp)
1288c2ecf20Sopenharmony_ci	stw r13, 52(sp)
1298c2ecf20Sopenharmony_ci	stw r14, 56(sp)
1308c2ecf20Sopenharmony_ci	stw r15, 60(sp)
1318c2ecf20Sopenharmony_ci	stw r16, 64(sp)
1328c2ecf20Sopenharmony_ci	stw r17, 68(sp)
1338c2ecf20Sopenharmony_ci	stw r18, 72(sp)
1348c2ecf20Sopenharmony_ci	stw r19, 76(sp)
1358c2ecf20Sopenharmony_ci	stw r20, 80(sp)
1368c2ecf20Sopenharmony_ci	stw r21, 84(sp)
1378c2ecf20Sopenharmony_ci	stw r22, 88(sp)
1388c2ecf20Sopenharmony_ci	stw r23, 92(sp)
1398c2ecf20Sopenharmony_ci		/* Don't bother to save et.  It's already been changed. */
1408c2ecf20Sopenharmony_ci	rdctl r5, estatus
1418c2ecf20Sopenharmony_ci	stw r5,  100(sp)
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	stw gp, 104(sp)
1448c2ecf20Sopenharmony_ci	stw et, 108(sp)	/* et contains previous sp value. */
1458c2ecf20Sopenharmony_ci	stw fp, 112(sp)
1468c2ecf20Sopenharmony_ci	stw ea, 116(sp)
1478c2ecf20Sopenharmony_ci	stw ra, 120(sp)
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	/*
1518c2ecf20Sopenharmony_ci	* Split the instruction into its fields.  We need 4*A, 4*B, and 4*C as
1528c2ecf20Sopenharmony_ci	* offsets to the stack pointer for access to the stored register values.
1538c2ecf20Sopenharmony_ci	*/
1548c2ecf20Sopenharmony_ci	ldw r2,-4(ea)	/* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
1558c2ecf20Sopenharmony_ci	roli r3, r2, 7	/* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
1568c2ecf20Sopenharmony_ci	roli r4, r3, 3	/* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
1578c2ecf20Sopenharmony_ci	roli r5, r4, 2	/* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
1588c2ecf20Sopenharmony_ci	srai r4, r4, 16	/* r4 = (sign-extended) IMM16 */
1598c2ecf20Sopenharmony_ci	roli r6, r5, 5	/* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
1608c2ecf20Sopenharmony_ci	andi r2, r2, 0x3f	/* r2 = 00000000000000000000000000,PPPPPP */
1618c2ecf20Sopenharmony_ci	andi r3, r3, 0x7c	/* r3 = 0000000000000000000000000,AAAAA,00 */
1628c2ecf20Sopenharmony_ci	andi r5, r5, 0x7c	/* r5 = 0000000000000000000000000,BBBBB,00 */
1638c2ecf20Sopenharmony_ci	andi r6, r6, 0x7c	/* r6 = 0000000000000000000000000,CCCCC,00 */
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	/* Now
1668c2ecf20Sopenharmony_ci	* r2 = OP
1678c2ecf20Sopenharmony_ci	* r3 = 4*A
1688c2ecf20Sopenharmony_ci	* r4 = IMM16 (sign extended)
1698c2ecf20Sopenharmony_ci	* r5 = 4*B
1708c2ecf20Sopenharmony_ci	* r6 = 4*C
1718c2ecf20Sopenharmony_ci	*/
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	/*
1748c2ecf20Sopenharmony_ci	* Get the operands.
1758c2ecf20Sopenharmony_ci	*
1768c2ecf20Sopenharmony_ci	* It is necessary to check for muli because it uses an I-type
1778c2ecf20Sopenharmony_ci	* instruction format, while the other instructions are have an R-type
1788c2ecf20Sopenharmony_ci	* format.
1798c2ecf20Sopenharmony_ci	*
1808c2ecf20Sopenharmony_ci	*  Prepare for either multiplication or division loop.
1818c2ecf20Sopenharmony_ci	*  They both loop 32 times.
1828c2ecf20Sopenharmony_ci	*/
1838c2ecf20Sopenharmony_ci	movi r14, 32
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	add  r3, r3, sp		/* r3 = address of A-operand. */
1868c2ecf20Sopenharmony_ci	ldw  r3, 0(r3)		/* r3 = A-operand. */
1878c2ecf20Sopenharmony_ci	movi r7, 0x24		/* muli opcode (I-type instruction format) */
1888c2ecf20Sopenharmony_ci	beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	add  r5, r5, sp		/* r5 = address of B-operand. */
1918c2ecf20Sopenharmony_ci	ldw  r5, 0(r5)		/* r5 = B-operand. */
1928c2ecf20Sopenharmony_ci				/* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
1938c2ecf20Sopenharmony_ci				/* IMM16 not needed, align OPX portion */
1948c2ecf20Sopenharmony_ci				/* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
1958c2ecf20Sopenharmony_ci	srli r4, r4, 5		/* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
1968c2ecf20Sopenharmony_ci	andi r4, r4, 0x3f	/* r4 = 00000000000000000000000000,-OPX-- */
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	/* Now
1998c2ecf20Sopenharmony_ci	* r2 = OP
2008c2ecf20Sopenharmony_ci	* r3 = src1
2018c2ecf20Sopenharmony_ci	* r5 = src2
2028c2ecf20Sopenharmony_ci	* r4 = OPX (no longer can be muli)
2038c2ecf20Sopenharmony_ci	* r6 = 4*C
2048c2ecf20Sopenharmony_ci	*/
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	/*
2088c2ecf20Sopenharmony_ci	*  Multiply or Divide?
2098c2ecf20Sopenharmony_ci	*/
2108c2ecf20Sopenharmony_ci	andi r7, r4, 0x02	/* For R-type multiply instructions,
2118c2ecf20Sopenharmony_ci				   OPX & 0x02 != 0 */
2128c2ecf20Sopenharmony_ci	bne r7, zero, multiply
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	/* DIVISION
2168c2ecf20Sopenharmony_ci	*
2178c2ecf20Sopenharmony_ci	* Divide an unsigned dividend by an unsigned divisor using
2188c2ecf20Sopenharmony_ci	* a shift-and-subtract algorithm.  The example below shows
2198c2ecf20Sopenharmony_ci	* 43 div 7 = 6 for 8-bit integers.  This classic algorithm uses a
2208c2ecf20Sopenharmony_ci	* single register to store both the dividend and the quotient,
2218c2ecf20Sopenharmony_ci	* allowing both values to be shifted with a single instruction.
2228c2ecf20Sopenharmony_ci	*
2238c2ecf20Sopenharmony_ci	*                               remainder dividend:quotient
2248c2ecf20Sopenharmony_ci	*                               --------- -----------------
2258c2ecf20Sopenharmony_ci	*   initialize                   00000000     00101011:
2268c2ecf20Sopenharmony_ci	*   shift                        00000000     0101011:_
2278c2ecf20Sopenharmony_ci	*   remainder >= divisor? no     00000000     0101011:0
2288c2ecf20Sopenharmony_ci	*   shift                        00000000     101011:0_
2298c2ecf20Sopenharmony_ci	*   remainder >= divisor? no     00000000     101011:00
2308c2ecf20Sopenharmony_ci	*   shift                        00000001     01011:00_
2318c2ecf20Sopenharmony_ci	*   remainder >= divisor? no     00000001     01011:000
2328c2ecf20Sopenharmony_ci	*   shift                        00000010     1011:000_
2338c2ecf20Sopenharmony_ci	*   remainder >= divisor? no     00000010     1011:0000
2348c2ecf20Sopenharmony_ci	*   shift                        00000101     011:0000_
2358c2ecf20Sopenharmony_ci	*   remainder >= divisor? no     00000101     011:00000
2368c2ecf20Sopenharmony_ci	*   shift                        00001010     11:00000_
2378c2ecf20Sopenharmony_ci	*   remainder >= divisor? yes    00001010     11:000001
2388c2ecf20Sopenharmony_ci	*       remainder -= divisor   - 00000111
2398c2ecf20Sopenharmony_ci	*                              ----------
2408c2ecf20Sopenharmony_ci	*                                00000011     11:000001
2418c2ecf20Sopenharmony_ci	*   shift                        00000111     1:000001_
2428c2ecf20Sopenharmony_ci	*   remainder >= divisor? yes    00000111     1:0000011
2438c2ecf20Sopenharmony_ci	*       remainder -= divisor   - 00000111
2448c2ecf20Sopenharmony_ci	*                              ----------
2458c2ecf20Sopenharmony_ci	*                                00000000     1:0000011
2468c2ecf20Sopenharmony_ci	*   shift                        00000001     :0000011_
2478c2ecf20Sopenharmony_ci	*   remainder >= divisor? no     00000001     :00000110
2488c2ecf20Sopenharmony_ci	*
2498c2ecf20Sopenharmony_ci	* The quotient is 00000110.
2508c2ecf20Sopenharmony_ci	*/
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_cidivide:
2538c2ecf20Sopenharmony_ci	/*
2548c2ecf20Sopenharmony_ci	*  Prepare for division by assuming the result
2558c2ecf20Sopenharmony_ci	*  is unsigned, and storing its "sign" as 0.
2568c2ecf20Sopenharmony_ci	*/
2578c2ecf20Sopenharmony_ci	movi r17, 0
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	/* Which division opcode? */
2618c2ecf20Sopenharmony_ci	xori r7, r4, 0x25		/* OPX of div */
2628c2ecf20Sopenharmony_ci	bne r7, zero, unsigned_division
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	/*
2668c2ecf20Sopenharmony_ci	*  OPX is div.  Determine and store the sign of the quotient.
2678c2ecf20Sopenharmony_ci	*  Then take the absolute value of both operands.
2688c2ecf20Sopenharmony_ci	*/
2698c2ecf20Sopenharmony_ci	xor r17, r3, r5		/* MSB contains sign of quotient */
2708c2ecf20Sopenharmony_ci	bge r3,zero,dividend_is_nonnegative
2718c2ecf20Sopenharmony_ci	sub r3, zero, r3	/* -r3 */
2728c2ecf20Sopenharmony_cidividend_is_nonnegative:
2738c2ecf20Sopenharmony_ci	bge r5, zero, divisor_is_nonnegative
2748c2ecf20Sopenharmony_ci	sub r5, zero, r5	/* -r5 */
2758c2ecf20Sopenharmony_cidivisor_is_nonnegative:
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ciunsigned_division:
2798c2ecf20Sopenharmony_ci	/* Initialize the unsigned-division loop. */
2808c2ecf20Sopenharmony_ci	movi r13, 0	/* remainder = 0 */
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	/* Now
2838c2ecf20Sopenharmony_ci	* r3 = dividend : quotient
2848c2ecf20Sopenharmony_ci	* r4 = 0x25 for div, 0x24 for divu
2858c2ecf20Sopenharmony_ci	* r5 = divisor
2868c2ecf20Sopenharmony_ci	* r13 = remainder
2878c2ecf20Sopenharmony_ci	* r14 = loop counter (already initialized to 32)
2888c2ecf20Sopenharmony_ci	* r17 = MSB contains sign of quotient
2898c2ecf20Sopenharmony_ci	*/
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	/*
2938c2ecf20Sopenharmony_ci	*   for (count = 32; count > 0; --count)
2948c2ecf20Sopenharmony_ci	*   {
2958c2ecf20Sopenharmony_ci	*/
2968c2ecf20Sopenharmony_cidivide_loop:
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	/*
2998c2ecf20Sopenharmony_ci	*       Division:
3008c2ecf20Sopenharmony_ci	*
3018c2ecf20Sopenharmony_ci	*       (remainder:dividend:quotient) <<= 1;
3028c2ecf20Sopenharmony_ci	*/
3038c2ecf20Sopenharmony_ci	slli r13, r13, 1
3048c2ecf20Sopenharmony_ci	cmplt r7, r3, zero	/* r7 = MSB of r3 */
3058c2ecf20Sopenharmony_ci	or r13, r13, r7
3068c2ecf20Sopenharmony_ci	slli r3, r3, 1
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	/*
3108c2ecf20Sopenharmony_ci	*       if (remainder >= divisor)
3118c2ecf20Sopenharmony_ci	*       {
3128c2ecf20Sopenharmony_ci	*           set LSB of quotient
3138c2ecf20Sopenharmony_ci	*           remainder -= divisor;
3148c2ecf20Sopenharmony_ci	*       }
3158c2ecf20Sopenharmony_ci	*/
3168c2ecf20Sopenharmony_ci	bltu r13, r5, div_skip
3178c2ecf20Sopenharmony_ci	ori r3, r3, 1
3188c2ecf20Sopenharmony_ci	sub r13, r13, r5
3198c2ecf20Sopenharmony_cidiv_skip:
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	/*
3228c2ecf20Sopenharmony_ci	*   }
3238c2ecf20Sopenharmony_ci	*/
3248c2ecf20Sopenharmony_ci	subi r14, r14, 1
3258c2ecf20Sopenharmony_ci	bne r14, zero, divide_loop
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	/* Now
3298c2ecf20Sopenharmony_ci	* r3 = quotient
3308c2ecf20Sopenharmony_ci	* r4 = 0x25 for div, 0x24 for divu
3318c2ecf20Sopenharmony_ci	* r6 = 4*C
3328c2ecf20Sopenharmony_ci	* r17 = MSB contains sign of quotient
3338c2ecf20Sopenharmony_ci	*/
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	/*
3378c2ecf20Sopenharmony_ci	*  Conditionally negate signed quotient.  If quotient is unsigned,
3388c2ecf20Sopenharmony_ci	*  the sign already is initialized to 0.
3398c2ecf20Sopenharmony_ci	*/
3408c2ecf20Sopenharmony_ci	bge r17, zero, quotient_is_nonnegative
3418c2ecf20Sopenharmony_ci	sub r3, zero, r3		/* -r3 */
3428c2ecf20Sopenharmony_ci	quotient_is_nonnegative:
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	/*
3468c2ecf20Sopenharmony_ci	*  Final quotient is in r3.
3478c2ecf20Sopenharmony_ci	*/
3488c2ecf20Sopenharmony_ci	add r6, r6, sp
3498c2ecf20Sopenharmony_ci	stw r3, 0(r6)	/* write quotient to stack */
3508c2ecf20Sopenharmony_ci	br restore_registers
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	/* MULTIPLICATION
3568c2ecf20Sopenharmony_ci	*
3578c2ecf20Sopenharmony_ci	* A "product" is the number that one gets by summing a "multiplicand"
3588c2ecf20Sopenharmony_ci	* several times.  The "multiplier" specifies the number of copies of the
3598c2ecf20Sopenharmony_ci	* multiplicand that are summed.
3608c2ecf20Sopenharmony_ci	*
3618c2ecf20Sopenharmony_ci	* Actual multiplication algorithms don't use repeated addition, however.
3628c2ecf20Sopenharmony_ci	* Shift-and-add algorithms get the same answer as repeated addition, and
3638c2ecf20Sopenharmony_ci	* they are faster.  To compute the lower half of a product (pppp below)
3648c2ecf20Sopenharmony_ci	* one shifts the product left before adding in each of the partial
3658c2ecf20Sopenharmony_ci	* products (a * mmmm) through (d * mmmm).
3668c2ecf20Sopenharmony_ci	*
3678c2ecf20Sopenharmony_ci	* To compute the upper half of a product (PPPP below), one adds in the
3688c2ecf20Sopenharmony_ci	* partial products (d * mmmm) through (a * mmmm), each time following
3698c2ecf20Sopenharmony_ci	* the add by a right shift of the product.
3708c2ecf20Sopenharmony_ci	*
3718c2ecf20Sopenharmony_ci	*     mmmm
3728c2ecf20Sopenharmony_ci	*   * abcd
3738c2ecf20Sopenharmony_ci	*   ------
3748c2ecf20Sopenharmony_ci	*     ####  = d * mmmm
3758c2ecf20Sopenharmony_ci	*    ####   = c * mmmm
3768c2ecf20Sopenharmony_ci	*   ####    = b * mmmm
3778c2ecf20Sopenharmony_ci	*  ####     = a * mmmm
3788c2ecf20Sopenharmony_ci	* --------
3798c2ecf20Sopenharmony_ci	* PPPPpppp
3808c2ecf20Sopenharmony_ci	*
3818c2ecf20Sopenharmony_ci	* The example above shows 4 partial products.  Computing actual Nios II
3828c2ecf20Sopenharmony_ci	* products requires 32 partials.
3838c2ecf20Sopenharmony_ci	*
3848c2ecf20Sopenharmony_ci	* It is possible to compute the result of mulxsu from the result of
3858c2ecf20Sopenharmony_ci	* mulxuu because the only difference between the results of these two
3868c2ecf20Sopenharmony_ci	* opcodes is the value of the partial product associated with the sign
3878c2ecf20Sopenharmony_ci	* bit of rA.
3888c2ecf20Sopenharmony_ci	*
3898c2ecf20Sopenharmony_ci	*   mulxsu = mulxuu - (rA < 0) ? rB : 0;
3908c2ecf20Sopenharmony_ci	*
3918c2ecf20Sopenharmony_ci	* It is possible to compute the result of mulxss from the result of
3928c2ecf20Sopenharmony_ci	* mulxsu because the only difference between the results of these two
3938c2ecf20Sopenharmony_ci	* opcodes is the value of the partial product associated with the sign
3948c2ecf20Sopenharmony_ci	* bit of rB.
3958c2ecf20Sopenharmony_ci	*
3968c2ecf20Sopenharmony_ci	*   mulxss = mulxsu - (rB < 0) ? rA : 0;
3978c2ecf20Sopenharmony_ci	*
3988c2ecf20Sopenharmony_ci	*/
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_cimul_immed:
4018c2ecf20Sopenharmony_ci	/* Opcode is muli.  Change it into mul for remainder of algorithm. */
4028c2ecf20Sopenharmony_ci	mov r6, r5		/* Field B is dest register, not field C. */
4038c2ecf20Sopenharmony_ci	mov r5, r4		/* Field IMM16 is src2, not field B. */
4048c2ecf20Sopenharmony_ci	movi r4, 0x27		/* OPX of mul is 0x27 */
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_cimultiply:
4078c2ecf20Sopenharmony_ci	/* Initialize the multiplication loop. */
4088c2ecf20Sopenharmony_ci	movi r9, 0	/* mul_product    = 0 */
4098c2ecf20Sopenharmony_ci	movi r10, 0	/* mulxuu_product = 0 */
4108c2ecf20Sopenharmony_ci	mov r11, r5	/* save original multiplier for mulxsu and mulxss */
4118c2ecf20Sopenharmony_ci	mov r12, r5	/* mulxuu_multiplier (will be shifted) */
4128c2ecf20Sopenharmony_ci	movi r16, 1	/* used to create "rori B,A,1" from "ror B,A,r16" */
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	/* Now
4158c2ecf20Sopenharmony_ci	* r3 = multiplicand
4168c2ecf20Sopenharmony_ci	* r5 = mul_multiplier
4178c2ecf20Sopenharmony_ci	* r6 = 4 * dest_register (used later as offset to sp)
4188c2ecf20Sopenharmony_ci	* r7 = temp
4198c2ecf20Sopenharmony_ci	* r9 = mul_product
4208c2ecf20Sopenharmony_ci	* r10 = mulxuu_product
4218c2ecf20Sopenharmony_ci	* r11 = original multiplier
4228c2ecf20Sopenharmony_ci	* r12 = mulxuu_multiplier
4238c2ecf20Sopenharmony_ci	* r14 = loop counter (already initialized)
4248c2ecf20Sopenharmony_ci	* r16 = 1
4258c2ecf20Sopenharmony_ci	*/
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	/*
4298c2ecf20Sopenharmony_ci	*   for (count = 32; count > 0; --count)
4308c2ecf20Sopenharmony_ci	*   {
4318c2ecf20Sopenharmony_ci	*/
4328c2ecf20Sopenharmony_cimultiply_loop:
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	/*
4358c2ecf20Sopenharmony_ci	*       mul_product <<= 1;
4368c2ecf20Sopenharmony_ci	*       lsb = multiplier & 1;
4378c2ecf20Sopenharmony_ci	*/
4388c2ecf20Sopenharmony_ci	slli r9, r9, 1
4398c2ecf20Sopenharmony_ci	andi r7, r12, 1
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	/*
4428c2ecf20Sopenharmony_ci	*       if (lsb == 1)
4438c2ecf20Sopenharmony_ci	*       {
4448c2ecf20Sopenharmony_ci	*           mulxuu_product += multiplicand;
4458c2ecf20Sopenharmony_ci	*       }
4468c2ecf20Sopenharmony_ci	*/
4478c2ecf20Sopenharmony_ci	beq r7, zero, mulx_skip
4488c2ecf20Sopenharmony_ci	add r10, r10, r3
4498c2ecf20Sopenharmony_ci	cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
4508c2ecf20Sopenharmony_ci	ror r7, r7, r16	/* r7 = 0x80000000 on carry, or else 0x00000000 */
4518c2ecf20Sopenharmony_cimulx_skip:
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	/*
4548c2ecf20Sopenharmony_ci	*       if (MSB of mul_multiplier == 1)
4558c2ecf20Sopenharmony_ci	*       {
4568c2ecf20Sopenharmony_ci	*           mul_product += multiplicand;
4578c2ecf20Sopenharmony_ci	*       }
4588c2ecf20Sopenharmony_ci	*/
4598c2ecf20Sopenharmony_ci	bge r5, zero, mul_skip
4608c2ecf20Sopenharmony_ci	add r9, r9, r3
4618c2ecf20Sopenharmony_cimul_skip:
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	/*
4648c2ecf20Sopenharmony_ci	*       mulxuu_product >>= 1;           logical shift
4658c2ecf20Sopenharmony_ci	*       mul_multiplier <<= 1;           done with MSB
4668c2ecf20Sopenharmony_ci	*       mulx_multiplier >>= 1;          done with LSB
4678c2ecf20Sopenharmony_ci	*/
4688c2ecf20Sopenharmony_ci	srli r10, r10, 1
4698c2ecf20Sopenharmony_ci	or r10, r10, r7		/* OR in the saved carry bit. */
4708c2ecf20Sopenharmony_ci	slli r5, r5, 1
4718c2ecf20Sopenharmony_ci	srli r12, r12, 1
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	/*
4758c2ecf20Sopenharmony_ci	*   }
4768c2ecf20Sopenharmony_ci	*/
4778c2ecf20Sopenharmony_ci	subi r14, r14, 1
4788c2ecf20Sopenharmony_ci	bne r14, zero, multiply_loop
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	/*
4828c2ecf20Sopenharmony_ci	*  Multiply emulation loop done.
4838c2ecf20Sopenharmony_ci	*/
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci	/* Now
4868c2ecf20Sopenharmony_ci	* r3 = multiplicand
4878c2ecf20Sopenharmony_ci	* r4 = OPX
4888c2ecf20Sopenharmony_ci	* r6 = 4 * dest_register (used later as offset to sp)
4898c2ecf20Sopenharmony_ci	* r7 = temp
4908c2ecf20Sopenharmony_ci	* r9 = mul_product
4918c2ecf20Sopenharmony_ci	* r10 = mulxuu_product
4928c2ecf20Sopenharmony_ci	* r11 = original multiplier
4938c2ecf20Sopenharmony_ci	*/
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	/* Calculate address for result from 4 * dest_register */
4978c2ecf20Sopenharmony_ci	add r6, r6, sp
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci	/*
5018c2ecf20Sopenharmony_ci	* Select/compute the result based on OPX.
5028c2ecf20Sopenharmony_ci	*/
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	/* OPX == mul?  Then store. */
5068c2ecf20Sopenharmony_ci	xori r7, r4, 0x27
5078c2ecf20Sopenharmony_ci	beq r7, zero, store_product
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	/* It's one of the mulx.. opcodes.  Move over the result. */
5108c2ecf20Sopenharmony_ci	mov r9, r10
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	/* OPX == mulxuu?  Then store. */
5138c2ecf20Sopenharmony_ci	xori r7, r4, 0x07
5148c2ecf20Sopenharmony_ci	beq r7, zero, store_product
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	/* Compute mulxsu
5178c2ecf20Sopenharmony_ci	 *
5188c2ecf20Sopenharmony_ci	 * mulxsu = mulxuu - (rA < 0) ? rB : 0;
5198c2ecf20Sopenharmony_ci	 */
5208c2ecf20Sopenharmony_ci	bge r3, zero, mulxsu_skip
5218c2ecf20Sopenharmony_ci	sub r9, r9, r11
5228c2ecf20Sopenharmony_cimulxsu_skip:
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	/* OPX == mulxsu?  Then store. */
5258c2ecf20Sopenharmony_ci	xori r7, r4, 0x17
5268c2ecf20Sopenharmony_ci	beq r7, zero, store_product
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	/* Compute mulxss
5298c2ecf20Sopenharmony_ci	 *
5308c2ecf20Sopenharmony_ci	 * mulxss = mulxsu - (rB < 0) ? rA : 0;
5318c2ecf20Sopenharmony_ci	 */
5328c2ecf20Sopenharmony_ci	bge r11,zero,mulxss_skip
5338c2ecf20Sopenharmony_ci	sub r9, r9, r3
5348c2ecf20Sopenharmony_cimulxss_skip:
5358c2ecf20Sopenharmony_ci	/* At this point, assume that OPX is mulxss, so store*/
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_cistore_product:
5398c2ecf20Sopenharmony_ci	stw r9, 0(r6)
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_cirestore_registers:
5438c2ecf20Sopenharmony_ci			/* No need to restore r0. */
5448c2ecf20Sopenharmony_ci	ldw r5, 100(sp)
5458c2ecf20Sopenharmony_ci	wrctl estatus, r5
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	ldw r1, 4(sp)
5488c2ecf20Sopenharmony_ci	ldw r2, 8(sp)
5498c2ecf20Sopenharmony_ci	ldw r3, 12(sp)
5508c2ecf20Sopenharmony_ci	ldw r4, 16(sp)
5518c2ecf20Sopenharmony_ci	ldw r5, 20(sp)
5528c2ecf20Sopenharmony_ci	ldw r6, 24(sp)
5538c2ecf20Sopenharmony_ci	ldw r7, 28(sp)
5548c2ecf20Sopenharmony_ci	ldw r8, 32(sp)
5558c2ecf20Sopenharmony_ci	ldw r9, 36(sp)
5568c2ecf20Sopenharmony_ci	ldw r10, 40(sp)
5578c2ecf20Sopenharmony_ci	ldw r11, 44(sp)
5588c2ecf20Sopenharmony_ci	ldw r12, 48(sp)
5598c2ecf20Sopenharmony_ci	ldw r13, 52(sp)
5608c2ecf20Sopenharmony_ci	ldw r14, 56(sp)
5618c2ecf20Sopenharmony_ci	ldw r15, 60(sp)
5628c2ecf20Sopenharmony_ci	ldw r16, 64(sp)
5638c2ecf20Sopenharmony_ci	ldw r17, 68(sp)
5648c2ecf20Sopenharmony_ci	ldw r18, 72(sp)
5658c2ecf20Sopenharmony_ci	ldw r19, 76(sp)
5668c2ecf20Sopenharmony_ci	ldw r20, 80(sp)
5678c2ecf20Sopenharmony_ci	ldw r21, 84(sp)
5688c2ecf20Sopenharmony_ci	ldw r22, 88(sp)
5698c2ecf20Sopenharmony_ci	ldw r23, 92(sp)
5708c2ecf20Sopenharmony_ci			/* Does not need to restore et */
5718c2ecf20Sopenharmony_ci	ldw gp, 104(sp)
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	ldw fp, 112(sp)
5748c2ecf20Sopenharmony_ci	ldw ea, 116(sp)
5758c2ecf20Sopenharmony_ci	ldw ra, 120(sp)
5768c2ecf20Sopenharmony_ci	ldw sp, 108(sp)	/* last restore sp */
5778c2ecf20Sopenharmony_ci	eret
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ci.set at
5808c2ecf20Sopenharmony_ci.set break
581