1 #include "mips_arch.h"
2 
3 #if defined(_MIPS_ARCH_MIPS64R6)
4 # define ddivu(rs,rt)
5 # define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
6 # define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
7 #elif defined(_MIPS_ARCH_MIPS32R6)
8 # define divu(rs,rt)
9 # define mfqt(rd,rs,rt)	divu	rd,rs,rt
10 # define mfrm(rd,rs,rt)	modu	rd,rs,rt
11 #else
12 # define ddivu(rs,rt)	ddivu	$0,rs,rt
13 # define mfqt(rd,rs,rt)	mflo	rd
14 # define mfrm(rd,rs,rt)	mfhi	rd
15 #endif
16 
17 .rdata
18 .asciiz	"mips3.s, Version 1.2"
19 .asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
20 
21 .text
22 .set	noat
23 
24 .align	5
25 .globl	bn_mul_add_words
26 .ent	bn_mul_add_words
27 bn_mul_add_words:
28 	.set	noreorder
29 	bgtz	$6,bn_mul_add_words_internal
30 	move	$2,$0
31 	jr	$31
32 	move	$4,$2
33 .end	bn_mul_add_words
34 
35 .align	5
36 .ent	bn_mul_add_words_internal
37 bn_mul_add_words_internal:
38 	.set	reorder
39 	li	$3,-4
40 	and	$8,$6,$3
41 	beqz	$8,.L_bn_mul_add_words_tail
42 
43 .L_bn_mul_add_words_loop:
44 	ld	$12,0($5)
45 	dmultu	($12,$7)
46 	ld	$13,0($4)
47 	ld	$14,8($5)
48 	ld	$15,8($4)
49 	ld	$8,2*8($5)
50 	ld	$9,2*8($4)
51 	daddu	$13,$2
52 	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
53 				# values", but it seems to work fine
54 				# even on 64-bit registers.
55 	mflo	($1,$12,$7)
56 	mfhi	($12,$12,$7)
57 	daddu	$13,$1
58 	daddu	$2,$12
59 	 dmultu	($14,$7)
60 	sltu	$1,$13,$1
61 	sd	$13,0($4)
62 	daddu	$2,$1
63 
64 	ld	$10,3*8($5)
65 	ld	$11,3*8($4)
66 	daddu	$15,$2
67 	sltu	$2,$15,$2
68 	mflo	($1,$14,$7)
69 	mfhi	($14,$14,$7)
70 	daddu	$15,$1
71 	daddu	$2,$14
72 	 dmultu	($8,$7)
73 	sltu	$1,$15,$1
74 	sd	$15,8($4)
75 	daddu	$2,$1
76 
77 	subu	$6,4
78 	daddu $4,4*8
79 	daddu $5,4*8
80 	daddu	$9,$2
81 	sltu	$2,$9,$2
82 	mflo	($1,$8,$7)
83 	mfhi	($8,$8,$7)
84 	daddu	$9,$1
85 	daddu	$2,$8
86 	 dmultu	($10,$7)
87 	sltu	$1,$9,$1
88 	sd	$9,-2*8($4)
89 	daddu	$2,$1
90 
91 
92 	and	$8,$6,$3
93 	daddu	$11,$2
94 	sltu	$2,$11,$2
95 	mflo	($1,$10,$7)
96 	mfhi	($10,$10,$7)
97 	daddu	$11,$1
98 	daddu	$2,$10
99 	sltu	$1,$11,$1
100 	sd	$11,-8($4)
101 	.set	noreorder
102 	bgtz	$8,.L_bn_mul_add_words_loop
103 	daddu	$2,$1
104 
105 	beqz	$6,.L_bn_mul_add_words_return
106 	nop
107 
108 .L_bn_mul_add_words_tail:
109 	.set	reorder
110 	ld	$12,0($5)
111 	dmultu	($12,$7)
112 	ld	$13,0($4)
113 	subu	$6,1
114 	daddu	$13,$2
115 	sltu	$2,$13,$2
116 	mflo	($1,$12,$7)
117 	mfhi	($12,$12,$7)
118 	daddu	$13,$1
119 	daddu	$2,$12
120 	sltu	$1,$13,$1
121 	sd	$13,0($4)
122 	daddu	$2,$1
123 	beqz	$6,.L_bn_mul_add_words_return
124 
125 	ld	$12,8($5)
126 	dmultu	($12,$7)
127 	ld	$13,8($4)
128 	subu	$6,1
129 	daddu	$13,$2
130 	sltu	$2,$13,$2
131 	mflo	($1,$12,$7)
132 	mfhi	($12,$12,$7)
133 	daddu	$13,$1
134 	daddu	$2,$12
135 	sltu	$1,$13,$1
136 	sd	$13,8($4)
137 	daddu	$2,$1
138 	beqz	$6,.L_bn_mul_add_words_return
139 
140 	ld	$12,2*8($5)
141 	dmultu	($12,$7)
142 	ld	$13,2*8($4)
143 	daddu	$13,$2
144 	sltu	$2,$13,$2
145 	mflo	($1,$12,$7)
146 	mfhi	($12,$12,$7)
147 	daddu	$13,$1
148 	daddu	$2,$12
149 	sltu	$1,$13,$1
150 	sd	$13,2*8($4)
151 	daddu	$2,$1
152 
153 .L_bn_mul_add_words_return:
154 	.set	noreorder
155 	jr	$31
156 	move	$4,$2
157 .end	bn_mul_add_words_internal
158 
159 .align	5
160 .globl	bn_mul_words
161 .ent	bn_mul_words
162 bn_mul_words:
163 	.set	noreorder
164 	bgtz	$6,bn_mul_words_internal
165 	move	$2,$0
166 	jr	$31
167 	move	$4,$2
168 .end	bn_mul_words
169 
170 .align	5
171 .ent	bn_mul_words_internal
172 bn_mul_words_internal:
173 	.set	reorder
174 	li	$3,-4
175 	and	$8,$6,$3
176 	beqz	$8,.L_bn_mul_words_tail
177 
178 .L_bn_mul_words_loop:
179 	ld	$12,0($5)
180 	dmultu	($12,$7)
181 	ld	$14,8($5)
182 	ld	$8,2*8($5)
183 	ld	$10,3*8($5)
184 	mflo	($1,$12,$7)
185 	mfhi	($12,$12,$7)
186 	daddu	$2,$1
187 	sltu	$13,$2,$1
188 	 dmultu	($14,$7)
189 	sd	$2,0($4)
190 	daddu	$2,$13,$12
191 
192 	subu	$6,4
193 	daddu $4,4*8
194 	daddu $5,4*8
195 	mflo	($1,$14,$7)
196 	mfhi	($14,$14,$7)
197 	daddu	$2,$1
198 	sltu	$15,$2,$1
199 	 dmultu	($8,$7)
200 	sd	$2,-3*8($4)
201 	daddu	$2,$15,$14
202 
203 	mflo	($1,$8,$7)
204 	mfhi	($8,$8,$7)
205 	daddu	$2,$1
206 	sltu	$9,$2,$1
207 	 dmultu	($10,$7)
208 	sd	$2,-2*8($4)
209 	daddu	$2,$9,$8
210 
211 	and	$8,$6,$3
212 	mflo	($1,$10,$7)
213 	mfhi	($10,$10,$7)
214 	daddu	$2,$1
215 	sltu	$11,$2,$1
216 	sd	$2,-8($4)
217 	.set	noreorder
218 	bgtz	$8,.L_bn_mul_words_loop
219 	daddu	$2,$11,$10
220 
221 	beqz	$6,.L_bn_mul_words_return
222 	nop
223 
224 .L_bn_mul_words_tail:
225 	.set	reorder
226 	ld	$12,0($5)
227 	dmultu	($12,$7)
228 	subu	$6,1
229 	mflo	($1,$12,$7)
230 	mfhi	($12,$12,$7)
231 	daddu	$2,$1
232 	sltu	$13,$2,$1
233 	sd	$2,0($4)
234 	daddu	$2,$13,$12
235 	beqz	$6,.L_bn_mul_words_return
236 
237 	ld	$12,8($5)
238 	dmultu	($12,$7)
239 	subu	$6,1
240 	mflo	($1,$12,$7)
241 	mfhi	($12,$12,$7)
242 	daddu	$2,$1
243 	sltu	$13,$2,$1
244 	sd	$2,8($4)
245 	daddu	$2,$13,$12
246 	beqz	$6,.L_bn_mul_words_return
247 
248 	ld	$12,2*8($5)
249 	dmultu	($12,$7)
250 	mflo	($1,$12,$7)
251 	mfhi	($12,$12,$7)
252 	daddu	$2,$1
253 	sltu	$13,$2,$1
254 	sd	$2,2*8($4)
255 	daddu	$2,$13,$12
256 
257 .L_bn_mul_words_return:
258 	.set	noreorder
259 	jr	$31
260 	move	$4,$2
261 .end	bn_mul_words_internal
262 
263 .align	5
264 .globl	bn_sqr_words
265 .ent	bn_sqr_words
266 bn_sqr_words:
267 	.set	noreorder
268 	bgtz	$6,bn_sqr_words_internal
269 	move	$2,$0
270 	jr	$31
271 	move	$4,$2
272 .end	bn_sqr_words
273 
274 .align	5
275 .ent	bn_sqr_words_internal
276 bn_sqr_words_internal:
277 	.set	reorder
278 	li	$3,-4
279 	and	$8,$6,$3
280 	beqz	$8,.L_bn_sqr_words_tail
281 
282 .L_bn_sqr_words_loop:
283 	ld	$12,0($5)
284 	dmultu	($12,$12)
285 	ld	$14,8($5)
286 	ld	$8,2*8($5)
287 	ld	$10,3*8($5)
288 	mflo	($13,$12,$12)
289 	mfhi	($12,$12,$12)
290 	sd	$13,0($4)
291 	sd	$12,8($4)
292 
293 	dmultu	($14,$14)
294 	subu	$6,4
295 	daddu $4,8*8
296 	daddu $5,4*8
297 	mflo	($15,$14,$14)
298 	mfhi	($14,$14,$14)
299 	sd	$15,-6*8($4)
300 	sd	$14,-5*8($4)
301 
302 	dmultu	($8,$8)
303 	mflo	($9,$8,$8)
304 	mfhi	($8,$8,$8)
305 	sd	$9,-4*8($4)
306 	sd	$8,-3*8($4)
307 
308 
309 	dmultu	($10,$10)
310 	and	$8,$6,$3
311 	mflo	($11,$10,$10)
312 	mfhi	($10,$10,$10)
313 	sd	$11,-2*8($4)
314 
315 	.set	noreorder
316 	bgtz	$8,.L_bn_sqr_words_loop
317 	sd	$10,-8($4)
318 
319 	beqz	$6,.L_bn_sqr_words_return
320 	nop
321 
322 .L_bn_sqr_words_tail:
323 	.set	reorder
324 	ld	$12,0($5)
325 	dmultu	($12,$12)
326 	subu	$6,1
327 	mflo	($13,$12,$12)
328 	mfhi	($12,$12,$12)
329 	sd	$13,0($4)
330 	sd	$12,8($4)
331 	beqz	$6,.L_bn_sqr_words_return
332 
333 	ld	$12,8($5)
334 	dmultu	($12,$12)
335 	subu	$6,1
336 	mflo	($13,$12,$12)
337 	mfhi	($12,$12,$12)
338 	sd	$13,2*8($4)
339 	sd	$12,3*8($4)
340 	beqz	$6,.L_bn_sqr_words_return
341 
342 	ld	$12,2*8($5)
343 	dmultu	($12,$12)
344 	mflo	($13,$12,$12)
345 	mfhi	($12,$12,$12)
346 	sd	$13,4*8($4)
347 	sd	$12,5*8($4)
348 
349 .L_bn_sqr_words_return:
350 	.set	noreorder
351 	jr	$31
352 	move	$4,$2
353 
354 .end	bn_sqr_words_internal
355 
356 .align	5
357 .globl	bn_add_words
358 .ent	bn_add_words
359 bn_add_words:
360 	.set	noreorder
361 	bgtz	$7,bn_add_words_internal
362 	move	$2,$0
363 	jr	$31
364 	move	$4,$2
365 .end	bn_add_words
366 
367 .align	5
368 .ent	bn_add_words_internal
369 bn_add_words_internal:
370 	.set	reorder
371 	li	$3,-4
372 	and	$1,$7,$3
373 	beqz	$1,.L_bn_add_words_tail
374 
375 .L_bn_add_words_loop:
376 	ld	$12,0($5)
377 	ld	$8,0($6)
378 	subu	$7,4
379 	ld	$13,8($5)
380 	and	$1,$7,$3
381 	ld	$14,2*8($5)
382 	daddu $6,4*8
383 	ld	$15,3*8($5)
384 	daddu $4,4*8
385 	ld	$9,-3*8($6)
386 	daddu $5,4*8
387 	ld	$10,-2*8($6)
388 	ld	$11,-8($6)
389 	daddu	$8,$12
390 	sltu	$24,$8,$12
391 	daddu	$12,$8,$2
392 	sltu	$2,$12,$8
393 	sd	$12,-4*8($4)
394 	daddu	$2,$24
395 
396 	daddu	$9,$13
397 	sltu	$25,$9,$13
398 	daddu	$13,$9,$2
399 	sltu	$2,$13,$9
400 	sd	$13,-3*8($4)
401 	daddu	$2,$25
402 
403 	daddu	$10,$14
404 	sltu	$24,$10,$14
405 	daddu	$14,$10,$2
406 	sltu	$2,$14,$10
407 	sd	$14,-2*8($4)
408 	daddu	$2,$24
409 
410 	daddu	$11,$15
411 	sltu	$25,$11,$15
412 	daddu	$15,$11,$2
413 	sltu	$2,$15,$11
414 	sd	$15,-8($4)
415 
416 	.set	noreorder
417 	bgtz	$1,.L_bn_add_words_loop
418 	daddu	$2,$25
419 
420 	beqz	$7,.L_bn_add_words_return
421 	nop
422 
423 .L_bn_add_words_tail:
424 	.set	reorder
425 	ld	$12,0($5)
426 	ld	$8,0($6)
427 	daddu	$8,$12
428 	subu	$7,1
429 	sltu	$24,$8,$12
430 	daddu	$12,$8,$2
431 	sltu	$2,$12,$8
432 	sd	$12,0($4)
433 	daddu	$2,$24
434 	beqz	$7,.L_bn_add_words_return
435 
436 	ld	$13,8($5)
437 	ld	$9,8($6)
438 	daddu	$9,$13
439 	subu	$7,1
440 	sltu	$25,$9,$13
441 	daddu	$13,$9,$2
442 	sltu	$2,$13,$9
443 	sd	$13,8($4)
444 	daddu	$2,$25
445 	beqz	$7,.L_bn_add_words_return
446 
447 	ld	$14,2*8($5)
448 	ld	$10,2*8($6)
449 	daddu	$10,$14
450 	sltu	$24,$10,$14
451 	daddu	$14,$10,$2
452 	sltu	$2,$14,$10
453 	sd	$14,2*8($4)
454 	daddu	$2,$24
455 
456 .L_bn_add_words_return:
457 	.set	noreorder
458 	jr	$31
459 	move	$4,$2
460 
461 .end	bn_add_words_internal
462 
463 .align	5
464 .globl	bn_sub_words
465 .ent	bn_sub_words
466 bn_sub_words:
467 	.set	noreorder
468 	bgtz	$7,bn_sub_words_internal
469 	move	$2,$0
470 	jr	$31
471 	move	$4,$0
472 .end	bn_sub_words
473 
474 .align	5
475 .ent	bn_sub_words_internal
476 bn_sub_words_internal:
477 	.set	reorder
478 	li	$3,-4
479 	and	$1,$7,$3
480 	beqz	$1,.L_bn_sub_words_tail
481 
482 .L_bn_sub_words_loop:
483 	ld	$12,0($5)
484 	ld	$8,0($6)
485 	subu	$7,4
486 	ld	$13,8($5)
487 	and	$1,$7,$3
488 	ld	$14,2*8($5)
489 	daddu $6,4*8
490 	ld	$15,3*8($5)
491 	daddu $4,4*8
492 	ld	$9,-3*8($6)
493 	daddu $5,4*8
494 	ld	$10,-2*8($6)
495 	ld	$11,-8($6)
496 	sltu	$24,$12,$8
497 	dsubu	$8,$12,$8
498 	dsubu	$12,$8,$2
499 	sgtu	$2,$12,$8
500 	sd	$12,-4*8($4)
501 	daddu	$2,$24
502 
503 	sltu	$25,$13,$9
504 	dsubu	$9,$13,$9
505 	dsubu	$13,$9,$2
506 	sgtu	$2,$13,$9
507 	sd	$13,-3*8($4)
508 	daddu	$2,$25
509 
510 
511 	sltu	$24,$14,$10
512 	dsubu	$10,$14,$10
513 	dsubu	$14,$10,$2
514 	sgtu	$2,$14,$10
515 	sd	$14,-2*8($4)
516 	daddu	$2,$24
517 
518 	sltu	$25,$15,$11
519 	dsubu	$11,$15,$11
520 	dsubu	$15,$11,$2
521 	sgtu	$2,$15,$11
522 	sd	$15,-8($4)
523 
524 	.set	noreorder
525 	bgtz	$1,.L_bn_sub_words_loop
526 	daddu	$2,$25
527 
528 	beqz	$7,.L_bn_sub_words_return
529 	nop
530 
531 .L_bn_sub_words_tail:
532 	.set	reorder
533 	ld	$12,0($5)
534 	ld	$8,0($6)
535 	subu	$7,1
536 	sltu	$24,$12,$8
537 	dsubu	$8,$12,$8
538 	dsubu	$12,$8,$2
539 	sgtu	$2,$12,$8
540 	sd	$12,0($4)
541 	daddu	$2,$24
542 	beqz	$7,.L_bn_sub_words_return
543 
544 	ld	$13,8($5)
545 	subu	$7,1
546 	ld	$9,8($6)
547 	sltu	$25,$13,$9
548 	dsubu	$9,$13,$9
549 	dsubu	$13,$9,$2
550 	sgtu	$2,$13,$9
551 	sd	$13,8($4)
552 	daddu	$2,$25
553 	beqz	$7,.L_bn_sub_words_return
554 
555 	ld	$14,2*8($5)
556 	ld	$10,2*8($6)
557 	sltu	$24,$14,$10
558 	dsubu	$10,$14,$10
559 	dsubu	$14,$10,$2
560 	sgtu	$2,$14,$10
561 	sd	$14,2*8($4)
562 	daddu	$2,$24
563 
564 .L_bn_sub_words_return:
565 	.set	noreorder
566 	jr	$31
567 	move	$4,$2
568 .end	bn_sub_words_internal
569 
570 #if 0
571 /*
572  * The bn_div_3_words entry point is re-used for constant-time interface.
573  * Implementation is retained as historical reference.
574  */
575 .align 5
576 .globl	bn_div_3_words
577 .ent	bn_div_3_words
578 bn_div_3_words:
579 	.set	noreorder
580 	move	$7,$4		# we know that bn_div_words does not
581 				# touch $7, $10, $11 and preserves $6
582 				# so that we can save two arguments
583 				# and return address in registers
584 				# instead of stack:-)
585 
586 	ld	$4,($7)
587 	move	$10,$5
588 	bne	$4,$6,bn_div_3_words_internal
589 	ld	$5,-8($7)
590 	li	$2,-1
591 	jr	$31
592 	move	$4,$2
593 .end	bn_div_3_words
594 
595 .align	5
596 .ent	bn_div_3_words_internal
597 bn_div_3_words_internal:
598 	.set	reorder
599 	move	$11,$31
600 	bal	bn_div_words_internal
601 	move	$31,$11
602 	dmultu	($10,$2)
603 	ld	$14,-2*8($7)
604 	move	$8,$0
605 	mfhi	($13,$10,$2)
606 	mflo	($12,$10,$2)
607 	sltu	$24,$13,$5
608 .L_bn_div_3_words_inner_loop:
609 	bnez	$24,.L_bn_div_3_words_inner_loop_done
610 	sgeu	$1,$14,$12
611 	seq	$25,$13,$5
612 	and	$1,$25
613 	sltu	$15,$12,$10
614 	daddu	$5,$6
615 	dsubu	$13,$15
616 	dsubu	$12,$10
617 	sltu	$24,$13,$5
618 	sltu	$8,$5,$6
619 	or	$24,$8
620 	.set	noreorder
621 	beqz	$1,.L_bn_div_3_words_inner_loop
622 	dsubu	$2,1
623 	daddu	$2,1
624 	.set	reorder
625 .L_bn_div_3_words_inner_loop_done:
626 	.set	noreorder
627 	jr	$31
628 	move	$4,$2
629 .end	bn_div_3_words_internal
630 #endif
631 
632 .align	5
633 .globl	bn_div_words
634 .ent	bn_div_words
635 bn_div_words:
636 	.set	noreorder
637 	bnez	$6,bn_div_words_internal
638 	li	$2,-1		# I would rather signal div-by-zero
639 				# which can be done with 'break 7'
640 	jr	$31
641 	move	$4,$2
642 .end	bn_div_words
643 
644 .align	5
645 .ent	bn_div_words_internal
646 bn_div_words_internal:
647 	move	$3,$0
648 	bltz	$6,.L_bn_div_words_body
649 	move	$25,$3
650 	dsll	$6,1
651 	bgtz	$6,.-4
652 	addu	$25,1
653 
654 	.set	reorder
655 	negu	$13,$25
656 	li	$14,-1
657 	dsll	$14,$13
658 	and	$14,$4
659 	dsrl	$1,$5,$13
660 	.set	noreorder
661 	beqz	$14,.+12
662 	nop
663 	break	6		# signal overflow
664 	.set	reorder
665 	dsll	$4,$25
666 	dsll	$5,$25
667 	or	$4,$1
668 .L_bn_div_words_body:
669 	dsrl	$3,$6,4*8	# bits
670 	sgeu	$1,$4,$6
671 	.set	noreorder
672 	beqz	$1,.+12
673 	nop
674 	dsubu	$4,$6
675 	.set	reorder
676 
677 	li	$8,-1
678 	dsrl	$9,$4,4*8	# bits
679 	dsrl	$8,4*8	# q=0xffffffff
680 	beq	$3,$9,.L_bn_div_words_skip_div1
681 	ddivu	($4,$3)
682 	mfqt	($8,$4,$3)
683 .L_bn_div_words_skip_div1:
684 	dmultu	($6,$8)
685 	dsll	$15,$4,4*8	# bits
686 	dsrl	$1,$5,4*8	# bits
687 	or	$15,$1
688 	mflo	($12,$6,$8)
689 	mfhi	($13,$6,$8)
690 .L_bn_div_words_inner_loop1:
691 	sltu	$14,$15,$12
692 	seq	$24,$9,$13
693 	sltu	$1,$9,$13
694 	and	$14,$24
695 	sltu	$2,$12,$6
696 	or	$1,$14
697 	.set	noreorder
698 	beqz	$1,.L_bn_div_words_inner_loop1_done
699 	dsubu	$13,$2
700 	dsubu	$12,$6
701 	b	.L_bn_div_words_inner_loop1
702 	dsubu	$8,1
703 	.set	reorder
704 .L_bn_div_words_inner_loop1_done:
705 
706 	dsll	$5,4*8	# bits
707 	dsubu	$4,$15,$12
708 	dsll	$2,$8,4*8	# bits
709 
710 	li	$8,-1
711 	dsrl	$9,$4,4*8	# bits
712 	dsrl	$8,4*8	# q=0xffffffff
713 	beq	$3,$9,.L_bn_div_words_skip_div2
714 	ddivu	($4,$3)
715 	mfqt	($8,$4,$3)
716 .L_bn_div_words_skip_div2:
717 	dmultu	($6,$8)
718 	dsll	$15,$4,4*8	# bits
719 	dsrl	$1,$5,4*8	# bits
720 	or	$15,$1
721 	mflo	($12,$6,$8)
722 	mfhi	($13,$6,$8)
723 .L_bn_div_words_inner_loop2:
724 	sltu	$14,$15,$12
725 	seq	$24,$9,$13
726 	sltu	$1,$9,$13
727 	and	$14,$24
728 	sltu	$3,$12,$6
729 	or	$1,$14
730 	.set	noreorder
731 	beqz	$1,.L_bn_div_words_inner_loop2_done
732 	dsubu	$13,$3
733 	dsubu	$12,$6
734 	b	.L_bn_div_words_inner_loop2
735 	dsubu	$8,1
736 	.set	reorder
737 .L_bn_div_words_inner_loop2_done:
738 
739 	dsubu	$4,$15,$12
740 	or	$2,$8
741 	dsrl	$3,$4,$25	# $3 contains remainder if anybody wants it
742 	dsrl	$6,$25		# restore $6
743 
744 	.set	noreorder
745 	move	$5,$3
746 	jr	$31
747 	move	$4,$2
748 .end	bn_div_words_internal
749 
750 .align	5
751 .globl	bn_mul_comba8
752 .ent	bn_mul_comba8
753 bn_mul_comba8:
754 	.set	noreorder
755 	.frame	$29,6*8,$31
756 	.mask	0x003f0000,-8
757 	dsubu $29,6*8
758 	sd	$21,5*8($29)
759 	sd	$20,4*8($29)
760 	sd	$19,3*8($29)
761 	sd	$18,2*8($29)
762 	sd	$17,1*8($29)
763 	sd	$16,0*8($29)
764 
765 	.set	reorder
766 	ld	$12,0($5)	# If compiled with -mips3 option on
767 				# R5000 box assembler barks on this
768 				# 1ine with "should not have mult/div
769 				# as last instruction in bb (R10K
770 				# bug)" warning. If anybody out there
771 				# has a clue about how to circumvent
772 				# this do send me a note.
773 				#		<appro@fy.chalmers.se>
774 
775 	ld	$8,0($6)
776 	ld	$13,8($5)
777 	ld	$14,2*8($5)
778 	dmultu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
779 	ld	$15,3*8($5)
780 	ld	$9,8($6)
781 	ld	$10,2*8($6)
782 	ld	$11,3*8($6)
783 	mflo	($2,$12,$8)
784 	mfhi	($3,$12,$8)
785 
786 	ld	$16,4*8($5)
787 	ld	$18,5*8($5)
788 	dmultu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
789 	ld	$20,6*8($5)
790 	ld	$5,7*8($5)
791 	ld	$17,4*8($6)
792 	ld	$19,5*8($6)
793 	mflo	($24,$12,$9)
794 	mfhi	($25,$12,$9)
795 	daddu	$3,$24
796 	sltu	$1,$3,$24
797 	dmultu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
798 	daddu	$7,$25,$1
799 	ld	$21,6*8($6)
800 	ld	$6,7*8($6)
801 	sd	$2,0($4)	# r[0]=c1;
802 	mflo	($24,$13,$8)
803 	mfhi	($25,$13,$8)
804 	daddu	$3,$24
805 	sltu	$1,$3,$24
806 	 dmultu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
807 	daddu	$25,$1
808 	daddu	$7,$25
809 	sltu	$2,$7,$25
810 	sd	$3,8($4)	# r[1]=c2;
811 
812 	mflo	($24,$14,$8)
813 	mfhi	($25,$14,$8)
814 	daddu	$7,$24
815 	sltu	$1,$7,$24
816 	dmultu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
817 	daddu	$25,$1
818 	daddu	$2,$25
819 	mflo	($24,$13,$9)
820 	mfhi	($25,$13,$9)
821 	daddu	$7,$24
822 	sltu	$1,$7,$24
823 	dmultu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
824 	daddu	$25,$1
825 	daddu	$2,$25
826 	sltu	$3,$2,$25
827 	mflo	($24,$12,$10)
828 	mfhi	($25,$12,$10)
829 	daddu	$7,$24
830 	sltu	$1,$7,$24
831 	 dmultu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
832 	daddu	$25,$1
833 	daddu	$2,$25
834 	sltu	$1,$2,$25
835 	daddu	$3,$1
836 	sd	$7,2*8($4)	# r[2]=c3;
837 
838 	mflo	($24,$12,$11)
839 	mfhi	($25,$12,$11)
840 	daddu	$2,$24
841 	sltu	$1,$2,$24
842 	dmultu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
843 	daddu	$25,$1
844 	daddu	$3,$25
845 	sltu	$7,$3,$25
846 	mflo	($24,$13,$10)
847 	mfhi	($25,$13,$10)
848 	daddu	$2,$24
849 	sltu	$1,$2,$24
850 	dmultu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
851 	daddu	$25,$1
852 	daddu	$3,$25
853 	sltu	$1,$3,$25
854 	daddu	$7,$1
855 	mflo	($24,$14,$9)
856 	mfhi	($25,$14,$9)
857 	daddu	$2,$24
858 	sltu	$1,$2,$24
859 	dmultu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
860 	daddu	$25,$1
861 	daddu	$3,$25
862 	sltu	$1,$3,$25
863 	daddu	$7,$1
864 	mflo	($24,$15,$8)
865 	mfhi	($25,$15,$8)
866 	daddu	$2,$24
867 	sltu	$1,$2,$24
868 	 dmultu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
869 	daddu	$25,$1
870 	daddu	$3,$25
871 	sltu	$1,$3,$25
872 	daddu	$7,$1
873 	sd	$2,3*8($4)	# r[3]=c1;
874 
875 	mflo	($24,$16,$8)
876 	mfhi	($25,$16,$8)
877 	daddu	$3,$24
878 	sltu	$1,$3,$24
879 	dmultu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
880 	daddu	$25,$1
881 	daddu	$7,$25
882 	sltu	$2,$7,$25
883 	mflo	($24,$15,$9)
884 	mfhi	($25,$15,$9)
885 	daddu	$3,$24
886 	sltu	$1,$3,$24
887 	dmultu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
888 	daddu	$25,$1
889 	daddu	$7,$25
890 	sltu	$1,$7,$25
891 	daddu	$2,$1
892 	mflo	($24,$14,$10)
893 	mfhi	($25,$14,$10)
894 	daddu	$3,$24
895 	sltu	$1,$3,$24
896 	dmultu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
897 	daddu	$25,$1
898 	daddu	$7,$25
899 	sltu	$1,$7,$25
900 	daddu	$2,$1
901 	mflo	($24,$13,$11)
902 	mfhi	($25,$13,$11)
903 	daddu	$3,$24
904 	sltu	$1,$3,$24
905 	dmultu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
906 	daddu	$25,$1
907 	daddu	$7,$25
908 	sltu	$1,$7,$25
909 	daddu	$2,$1
910 	mflo	($24,$12,$17)
911 	mfhi	($25,$12,$17)
912 	daddu	$3,$24
913 	sltu	$1,$3,$24
914 	 dmultu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
915 	daddu	$25,$1
916 	daddu	$7,$25
917 	sltu	$1,$7,$25
918 	daddu	$2,$1
919 	sd	$3,4*8($4)	# r[4]=c2;
920 
921 	mflo	($24,$12,$19)
922 	mfhi	($25,$12,$19)
923 	daddu	$7,$24
924 	sltu	$1,$7,$24
925 	dmultu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
926 	daddu	$25,$1
927 	daddu	$2,$25
928 	sltu	$3,$2,$25
929 	mflo	($24,$13,$17)
930 	mfhi	($25,$13,$17)
931 	daddu	$7,$24
932 	sltu	$1,$7,$24
933 	dmultu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
934 	daddu	$25,$1
935 	daddu	$2,$25
936 	sltu	$1,$2,$25
937 	daddu	$3,$1
938 	mflo	($24,$14,$11)
939 	mfhi	($25,$14,$11)
940 	daddu	$7,$24
941 	sltu	$1,$7,$24
942 	dmultu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
943 	daddu	$25,$1
944 	daddu	$2,$25
945 	sltu	$1,$2,$25
946 	daddu	$3,$1
947 	mflo	($24,$15,$10)
948 	mfhi	($25,$15,$10)
949 	daddu	$7,$24
950 	sltu	$1,$7,$24
951 	dmultu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
952 	daddu	$25,$1
953 	daddu	$2,$25
954 	sltu	$1,$2,$25
955 	daddu	$3,$1
956 	mflo	($24,$16,$9)
957 	mfhi	($25,$16,$9)
958 	daddu	$7,$24
959 	sltu	$1,$7,$24
960 	dmultu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
961 	daddu	$25,$1
962 	daddu	$2,$25
963 	sltu	$1,$2,$25
964 	daddu	$3,$1
965 	mflo	($24,$18,$8)
966 	mfhi	($25,$18,$8)
967 	daddu	$7,$24
968 	sltu	$1,$7,$24
969 	 dmultu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
970 	daddu	$25,$1
971 	daddu	$2,$25
972 	sltu	$1,$2,$25
973 	daddu	$3,$1
974 	sd	$7,5*8($4)	# r[5]=c3;
975 
976 	mflo	($24,$20,$8)
977 	mfhi	($25,$20,$8)
978 	daddu	$2,$24
979 	sltu	$1,$2,$24
980 	dmultu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
981 	daddu	$25,$1
982 	daddu	$3,$25
983 	sltu	$7,$3,$25
984 	mflo	($24,$18,$9)
985 	mfhi	($25,$18,$9)
986 	daddu	$2,$24
987 	sltu	$1,$2,$24
988 	dmultu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
989 	daddu	$25,$1
990 	daddu	$3,$25
991 	sltu	$1,$3,$25
992 	daddu	$7,$1
993 	mflo	($24,$16,$10)
994 	mfhi	($25,$16,$10)
995 	daddu	$2,$24
996 	sltu	$1,$2,$24
997 	dmultu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
998 	daddu	$25,$1
999 	daddu	$3,$25
1000 	sltu	$1,$3,$25
1001 	daddu	$7,$1
1002 	mflo	($24,$15,$11)
1003 	mfhi	($25,$15,$11)
1004 	daddu	$2,$24
1005 	sltu	$1,$2,$24
1006 	dmultu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
1007 	daddu	$25,$1
1008 	daddu	$3,$25
1009 	sltu	$1,$3,$25
1010 	daddu	$7,$1
1011 	mflo	($24,$14,$17)
1012 	mfhi	($25,$14,$17)
1013 	daddu	$2,$24
1014 	sltu	$1,$2,$24
1015 	dmultu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
1016 	daddu	$25,$1
1017 	daddu	$3,$25
1018 	sltu	$1,$3,$25
1019 	daddu	$7,$1
1020 	mflo	($24,$13,$19)
1021 	mfhi	($25,$13,$19)
1022 	daddu	$2,$24
1023 	sltu	$1,$2,$24
1024 	dmultu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
1025 	daddu	$25,$1
1026 	daddu	$3,$25
1027 	sltu	$1,$3,$25
1028 	daddu	$7,$1
1029 	mflo	($24,$12,$21)
1030 	mfhi	($25,$12,$21)
1031 	daddu	$2,$24
1032 	sltu	$1,$2,$24
1033 	 dmultu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
1034 	daddu	$25,$1
1035 	daddu	$3,$25
1036 	sltu	$1,$3,$25
1037 	daddu	$7,$1
1038 	sd	$2,6*8($4)	# r[6]=c1;
1039 
1040 	mflo	($24,$12,$6)
1041 	mfhi	($25,$12,$6)
1042 	daddu	$3,$24
1043 	sltu	$1,$3,$24
1044 	dmultu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
1045 	daddu	$25,$1
1046 	daddu	$7,$25
1047 	sltu	$2,$7,$25
1048 	mflo	($24,$13,$21)
1049 	mfhi	($25,$13,$21)
1050 	daddu	$3,$24
1051 	sltu	$1,$3,$24
1052 	dmultu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
1053 	daddu	$25,$1
1054 	daddu	$7,$25
1055 	sltu	$1,$7,$25
1056 	daddu	$2,$1
1057 	mflo	($24,$14,$19)
1058 	mfhi	($25,$14,$19)
1059 	daddu	$3,$24
1060 	sltu	$1,$3,$24
1061 	dmultu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
1062 	daddu	$25,$1
1063 	daddu	$7,$25
1064 	sltu	$1,$7,$25
1065 	daddu	$2,$1
1066 	mflo	($24,$15,$17)
1067 	mfhi	($25,$15,$17)
1068 	daddu	$3,$24
1069 	sltu	$1,$3,$24
1070 	dmultu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
1071 	daddu	$25,$1
1072 	daddu	$7,$25
1073 	sltu	$1,$7,$25
1074 	daddu	$2,$1
1075 	mflo	($24,$16,$11)
1076 	mfhi	($25,$16,$11)
1077 	daddu	$3,$24
1078 	sltu	$1,$3,$24
1079 	dmultu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
1080 	daddu	$25,$1
1081 	daddu	$7,$25
1082 	sltu	$1,$7,$25
1083 	daddu	$2,$1
1084 	mflo	($24,$18,$10)
1085 	mfhi	($25,$18,$10)
1086 	daddu	$3,$24
1087 	sltu	$1,$3,$24
1088 	dmultu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
1089 	daddu	$25,$1
1090 	daddu	$7,$25
1091 	sltu	$1,$7,$25
1092 	daddu	$2,$1
1093 	mflo	($24,$20,$9)
1094 	mfhi	($25,$20,$9)
1095 	daddu	$3,$24
1096 	sltu	$1,$3,$24
1097 	dmultu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
1098 	daddu	$25,$1
1099 	daddu	$7,$25
1100 	sltu	$1,$7,$25
1101 	daddu	$2,$1
1102 	mflo	($24,$5,$8)
1103 	mfhi	($25,$5,$8)
1104 	daddu	$3,$24
1105 	sltu	$1,$3,$24
1106 	 dmultu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
1107 	daddu	$25,$1
1108 	daddu	$7,$25
1109 	sltu	$1,$7,$25
1110 	daddu	$2,$1
1111 	sd	$3,7*8($4)	# r[7]=c2;
1112 
1113 	mflo	($24,$5,$9)
1114 	mfhi	($25,$5,$9)
1115 	daddu	$7,$24
1116 	sltu	$1,$7,$24
1117 	dmultu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
1118 	daddu	$25,$1
1119 	daddu	$2,$25
1120 	sltu	$3,$2,$25
1121 	mflo	($24,$20,$10)
1122 	mfhi	($25,$20,$10)
1123 	daddu	$7,$24
1124 	sltu	$1,$7,$24
1125 	dmultu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
1126 	daddu	$25,$1
1127 	daddu	$2,$25
1128 	sltu	$1,$2,$25
1129 	daddu	$3,$1
1130 	mflo	($24,$18,$11)
1131 	mfhi	($25,$18,$11)
1132 	daddu	$7,$24
1133 	sltu	$1,$7,$24
1134 	dmultu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
1135 	daddu	$25,$1
1136 	daddu	$2,$25
1137 	sltu	$1,$2,$25
1138 	daddu	$3,$1
1139 	mflo	($24,$16,$17)
1140 	mfhi	($25,$16,$17)
1141 	daddu	$7,$24
1142 	sltu	$1,$7,$24
1143 	dmultu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
1144 	daddu	$25,$1
1145 	daddu	$2,$25
1146 	sltu	$1,$2,$25
1147 	daddu	$3,$1
1148 	mflo	($24,$15,$19)
1149 	mfhi	($25,$15,$19)
1150 	daddu	$7,$24
1151 	sltu	$1,$7,$24
1152 	dmultu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
1153 	daddu	$25,$1
1154 	daddu	$2,$25
1155 	sltu	$1,$2,$25
1156 	daddu	$3,$1
1157 	mflo	($24,$14,$21)
1158 	mfhi	($25,$14,$21)
1159 	daddu	$7,$24
1160 	sltu	$1,$7,$24
1161 	dmultu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
1162 	daddu	$25,$1
1163 	daddu	$2,$25
1164 	sltu	$1,$2,$25
1165 	daddu	$3,$1
1166 	mflo	($24,$13,$6)
1167 	mfhi	($25,$13,$6)
1168 	daddu	$7,$24
1169 	sltu	$1,$7,$24
1170 	 dmultu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
1171 	daddu	$25,$1
1172 	daddu	$2,$25
1173 	sltu	$1,$2,$25
1174 	daddu	$3,$1
1175 	sd	$7,8*8($4)	# r[8]=c3;
1176 
1177 	mflo	($24,$14,$6)
1178 	mfhi	($25,$14,$6)
1179 	daddu	$2,$24
1180 	sltu	$1,$2,$24
1181 	dmultu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
1182 	daddu	$25,$1
1183 	daddu	$3,$25
1184 	sltu	$7,$3,$25
1185 	mflo	($24,$15,$21)
1186 	mfhi	($25,$15,$21)
1187 	daddu	$2,$24
1188 	sltu	$1,$2,$24
1189 	dmultu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
1190 	daddu	$25,$1
1191 	daddu	$3,$25
1192 	sltu	$1,$3,$25
1193 	daddu	$7,$1
1194 	mflo	($24,$16,$19)
1195 	mfhi	($25,$16,$19)
1196 	daddu	$2,$24
1197 	sltu	$1,$2,$24
1198 	dmultu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
1199 	daddu	$25,$1
1200 	daddu	$3,$25
1201 	sltu	$1,$3,$25
1202 	daddu	$7,$1
1203 	mflo	($24,$18,$17)
1204 	mfhi	($25,$18,$17)
1205 	daddu	$2,$24
1206 	sltu	$1,$2,$24
1207 	dmultu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
1208 	daddu	$25,$1
1209 	daddu	$3,$25
1210 	sltu	$1,$3,$25
1211 	daddu	$7,$1
1212 	mflo	($24,$20,$11)
1213 	mfhi	($25,$20,$11)
1214 	daddu	$2,$24
1215 	sltu	$1,$2,$24
1216 	dmultu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
1217 	daddu	$25,$1
1218 	daddu	$3,$25
1219 	sltu	$1,$3,$25
1220 	daddu	$7,$1
1221 	mflo	($24,$5,$10)
1222 	mfhi	($25,$5,$10)
1223 	daddu	$2,$24
1224 	sltu	$1,$2,$24
1225 	 dmultu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
1226 	daddu	$25,$1
1227 	daddu	$3,$25
1228 	sltu	$1,$3,$25
1229 	daddu	$7,$1
1230 	sd	$2,9*8($4)	# r[9]=c1;
1231 
1232 	mflo	($24,$5,$11)
1233 	mfhi	($25,$5,$11)
1234 	daddu	$3,$24
1235 	sltu	$1,$3,$24
1236 	dmultu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
1237 	daddu	$25,$1
1238 	daddu	$7,$25
1239 	sltu	$2,$7,$25
1240 	mflo	($24,$20,$17)
1241 	mfhi	($25,$20,$17)
1242 	daddu	$3,$24
1243 	sltu	$1,$3,$24
1244 	dmultu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
1245 	daddu	$25,$1
1246 	daddu	$7,$25
1247 	sltu	$1,$7,$25
1248 	daddu	$2,$1
1249 	mflo	($24,$18,$19)
1250 	mfhi	($25,$18,$19)
1251 	daddu	$3,$24
1252 	sltu	$1,$3,$24
1253 	dmultu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
1254 	daddu	$25,$1
1255 	daddu	$7,$25
1256 	sltu	$1,$7,$25
1257 	daddu	$2,$1
1258 	mflo	($24,$16,$21)
1259 	mfhi	($25,$16,$21)
1260 	daddu	$3,$24
1261 	sltu	$1,$3,$24
1262 	dmultu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
1263 	daddu	$25,$1
1264 	daddu	$7,$25
1265 	sltu	$1,$7,$25
1266 	daddu	$2,$1
1267 	mflo	($24,$15,$6)
1268 	mfhi	($25,$15,$6)
1269 	daddu	$3,$24
1270 	sltu	$1,$3,$24
1271 	dmultu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
1272 	daddu	$25,$1
1273 	daddu	$7,$25
1274 	sltu	$1,$7,$25
1275 	daddu	$2,$1
1276 	sd	$3,10*8($4)	# r[10]=c2;
1277 
1278 	mflo	($24,$16,$6)
1279 	mfhi	($25,$16,$6)
1280 	daddu	$7,$24
1281 	sltu	$1,$7,$24
1282 	dmultu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
1283 	daddu	$25,$1
1284 	daddu	$2,$25
1285 	sltu	$3,$2,$25
1286 	mflo	($24,$18,$21)
1287 	mfhi	($25,$18,$21)
1288 	daddu	$7,$24
1289 	sltu	$1,$7,$24
1290 	dmultu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
1291 	daddu	$25,$1
1292 	daddu	$2,$25
1293 	sltu	$1,$2,$25
1294 	daddu	$3,$1
1295 	mflo	($24,$20,$19)
1296 	mfhi	($25,$20,$19)
1297 	daddu	$7,$24
1298 	sltu	$1,$7,$24
1299 	dmultu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
1300 	daddu	$25,$1
1301 	daddu	$2,$25
1302 	sltu	$1,$2,$25
1303 	daddu	$3,$1
1304 	mflo	($24,$5,$17)
1305 	mfhi	($25,$5,$17)
1306 	daddu	$7,$24
1307 	sltu	$1,$7,$24
1308 	 dmultu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
1309 	daddu	$25,$1
1310 	daddu	$2,$25
1311 	sltu	$1,$2,$25
1312 	daddu	$3,$1
1313 	sd	$7,11*8($4)	# r[11]=c3;
1314 
1315 	mflo	($24,$5,$19)
1316 	mfhi	($25,$5,$19)
1317 	daddu	$2,$24
1318 	sltu	$1,$2,$24
1319 	dmultu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
1320 	daddu	$25,$1
1321 	daddu	$3,$25
1322 	sltu	$7,$3,$25
1323 	mflo	($24,$20,$21)
1324 	mfhi	($25,$20,$21)
1325 	daddu	$2,$24
1326 	sltu	$1,$2,$24
1327 	dmultu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
1328 	daddu	$25,$1
1329 	daddu	$3,$25
1330 	sltu	$1,$3,$25
1331 	daddu	$7,$1
1332 	mflo	($24,$18,$6)
1333 	mfhi	($25,$18,$6)
1334 	daddu	$2,$24
1335 	sltu	$1,$2,$24
1336 	 dmultu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
1337 	daddu	$25,$1
1338 	daddu	$3,$25
1339 	sltu	$1,$3,$25
1340 	daddu	$7,$1
1341 	sd	$2,12*8($4)	# r[12]=c1;
1342 
1343 	mflo	($24,$20,$6)
1344 	mfhi	($25,$20,$6)
1345 	daddu	$3,$24
1346 	sltu	$1,$3,$24
1347 	dmultu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
1348 	daddu	$25,$1
1349 	daddu	$7,$25
1350 	sltu	$2,$7,$25
1351 	mflo	($24,$5,$21)
1352 	mfhi	($25,$5,$21)
1353 	daddu	$3,$24
1354 	sltu	$1,$3,$24
1355 	dmultu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
1356 	daddu	$25,$1
1357 	daddu	$7,$25
1358 	sltu	$1,$7,$25
1359 	daddu	$2,$1
1360 	sd	$3,13*8($4)	# r[13]=c2;
1361 
1362 	mflo	($24,$5,$6)
1363 	mfhi	($25,$5,$6)
1364 	daddu	$7,$24
1365 	sltu	$1,$7,$24
1366 	daddu	$25,$1
1367 	daddu	$2,$25
1368 	sd	$7,14*8($4)	# r[14]=c3;
1369 	sd	$2,15*8($4)	# r[15]=c1;
1370 
1371 	.set	noreorder
1372 	ld	$21,5*8($29)
1373 	ld	$20,4*8($29)
1374 	ld	$19,3*8($29)
1375 	ld	$18,2*8($29)
1376 	ld	$17,1*8($29)
1377 	ld	$16,0*8($29)
1378 	jr	$31
1379 	daddu $29,6*8
1380 .end	bn_mul_comba8
1381 
1382 .align	5
1383 .globl	bn_mul_comba4
1384 .ent	bn_mul_comba4
1385 bn_mul_comba4:
1386 	.set	reorder
1387 	ld	$12,0($5)
1388 	ld	$8,0($6)
1389 	ld	$13,8($5)
1390 	ld	$14,2*8($5)
1391 	dmultu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
1392 	ld	$15,3*8($5)
1393 	ld	$9,8($6)
1394 	ld	$10,2*8($6)
1395 	ld	$11,3*8($6)
1396 	mflo	($2,$12,$8)
1397 	mfhi	($3,$12,$8)
1398 	sd	$2,0($4)
1399 
1400 	dmultu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
1401 	mflo	($24,$12,$9)
1402 	mfhi	($25,$12,$9)
1403 	daddu	$3,$24
1404 	sltu	$1,$3,$24
1405 	dmultu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
1406 	daddu	$7,$25,$1
1407 	mflo	($24,$13,$8)
1408 	mfhi	($25,$13,$8)
1409 	daddu	$3,$24
1410 	sltu	$1,$3,$24
1411 	 dmultu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
1412 	daddu	$25,$1
1413 	daddu	$7,$25
1414 	sltu	$2,$7,$25
1415 	sd	$3,8($4)
1416 
1417 	mflo	($24,$14,$8)
1418 	mfhi	($25,$14,$8)
1419 	daddu	$7,$24
1420 	sltu	$1,$7,$24
1421 	dmultu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
1422 	daddu	$25,$1
1423 	daddu	$2,$25
1424 	mflo	($24,$13,$9)
1425 	mfhi	($25,$13,$9)
1426 	daddu	$7,$24
1427 	sltu	$1,$7,$24
1428 	dmultu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
1429 	daddu	$25,$1
1430 	daddu	$2,$25
1431 	sltu	$3,$2,$25
1432 	mflo	($24,$12,$10)
1433 	mfhi	($25,$12,$10)
1434 	daddu	$7,$24
1435 	sltu	$1,$7,$24
1436 	 dmultu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
1437 	daddu	$25,$1
1438 	daddu	$2,$25
1439 	sltu	$1,$2,$25
1440 	daddu	$3,$1
1441 	sd	$7,2*8($4)
1442 
1443 	mflo	($24,$12,$11)
1444 	mfhi	($25,$12,$11)
1445 	daddu	$2,$24
1446 	sltu	$1,$2,$24
1447 	dmultu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
1448 	daddu	$25,$1
1449 	daddu	$3,$25
1450 	sltu	$7,$3,$25
1451 	mflo	($24,$13,$10)
1452 	mfhi	($25,$13,$10)
1453 	daddu	$2,$24
1454 	sltu	$1,$2,$24
1455 	dmultu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
1456 	daddu	$25,$1
1457 	daddu	$3,$25
1458 	sltu	$1,$3,$25
1459 	daddu	$7,$1
1460 	mflo	($24,$14,$9)
1461 	mfhi	($25,$14,$9)
1462 	daddu	$2,$24
1463 	sltu	$1,$2,$24
1464 	dmultu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
1465 	daddu	$25,$1
1466 	daddu	$3,$25
1467 	sltu	$1,$3,$25
1468 	daddu	$7,$1
1469 	mflo	($24,$15,$8)
1470 	mfhi	($25,$15,$8)
1471 	daddu	$2,$24
1472 	sltu	$1,$2,$24
1473 	 dmultu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
1474 	daddu	$25,$1
1475 	daddu	$3,$25
1476 	sltu	$1,$3,$25
1477 	daddu	$7,$1
1478 	sd	$2,3*8($4)
1479 
1480 	mflo	($24,$15,$9)
1481 	mfhi	($25,$15,$9)
1482 	daddu	$3,$24
1483 	sltu	$1,$3,$24
1484 	dmultu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
1485 	daddu	$25,$1
1486 	daddu	$7,$25
1487 	sltu	$2,$7,$25
1488 	mflo	($24,$14,$10)
1489 	mfhi	($25,$14,$10)
1490 	daddu	$3,$24
1491 	sltu	$1,$3,$24
1492 	dmultu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
1493 	daddu	$25,$1
1494 	daddu	$7,$25
1495 	sltu	$1,$7,$25
1496 	daddu	$2,$1
1497 	mflo	($24,$13,$11)
1498 	mfhi	($25,$13,$11)
1499 	daddu	$3,$24
1500 	sltu	$1,$3,$24
1501 	 dmultu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
1502 	daddu	$25,$1
1503 	daddu	$7,$25
1504 	sltu	$1,$7,$25
1505 	daddu	$2,$1
1506 	sd	$3,4*8($4)
1507 
1508 	mflo	($24,$14,$11)
1509 	mfhi	($25,$14,$11)
1510 	daddu	$7,$24
1511 	sltu	$1,$7,$24
1512 	dmultu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
1513 	daddu	$25,$1
1514 	daddu	$2,$25
1515 	sltu	$3,$2,$25
1516 	mflo	($24,$15,$10)
1517 	mfhi	($25,$15,$10)
1518 	daddu	$7,$24
1519 	sltu	$1,$7,$24
1520 	 dmultu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
1521 	daddu	$25,$1
1522 	daddu	$2,$25
1523 	sltu	$1,$2,$25
1524 	daddu	$3,$1
1525 	sd	$7,5*8($4)
1526 
1527 	mflo	($24,$15,$11)
1528 	mfhi	($25,$15,$11)
1529 	daddu	$2,$24
1530 	sltu	$1,$2,$24
1531 	daddu	$25,$1
1532 	daddu	$3,$25
1533 	sd	$2,6*8($4)
1534 	sd	$3,7*8($4)
1535 
1536 	.set	noreorder
1537 	jr	$31
1538 	nop
1539 .end	bn_mul_comba4
1540 
1541 .align	5
1542 .globl	bn_sqr_comba8
1543 .ent	bn_sqr_comba8
1544 bn_sqr_comba8:
1545 	.set	reorder
1546 	ld	$12,0($5)
1547 	ld	$13,8($5)
1548 	ld	$14,2*8($5)
1549 	ld	$15,3*8($5)
1550 
1551 	dmultu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
1552 	ld	$8,4*8($5)
1553 	ld	$9,5*8($5)
1554 	ld	$10,6*8($5)
1555 	ld	$11,7*8($5)
1556 	mflo	($2,$12,$12)
1557 	mfhi	($3,$12,$12)
1558 	sd	$2,0($4)
1559 
1560 	dmultu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
1561 	mflo	($24,$12,$13)
1562 	mfhi	($25,$12,$13)
1563 	slt	$2,$25,$0
1564 	dsll	$25,1
1565 	 dmultu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
1566 	slt	$6,$24,$0
1567 	daddu	$25,$6
1568 	dsll	$24,1
1569 	daddu	$3,$24
1570 	sltu	$1,$3,$24
1571 	daddu	$7,$25,$1
1572 	sd	$3,8($4)
1573 	sltu	$1,$7,$25
1574 	daddu	$2,$1
1575 	mflo	($24,$14,$12)
1576 	mfhi	($25,$14,$12)
1577 	daddu	$7,$24
1578 	sltu	$1,$7,$24
1579 	 dmultu	($13,$13)		# forward multiplication
1580 	daddu	$7,$24
1581 	daddu	$1,$25
1582 	sltu	$24,$7,$24
1583 	daddu	$2,$1
1584 	daddu	$25,$24
1585 	sltu	$3,$2,$1
1586 	daddu	$2,$25
1587 	sltu	$25,$2,$25
1588 	daddu	$3,$25
1589 	mflo	($24,$13,$13)
1590 	mfhi	($25,$13,$13)
1591 	daddu	$7,$24
1592 	sltu	$1,$7,$24
1593 	 dmultu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
1594 	daddu	$25,$1
1595 	daddu	$2,$25
1596 	sltu	$1,$2,$25
1597 	daddu	$3,$1
1598 	sd	$7,2*8($4)
1599 	mflo	($24,$12,$15)
1600 	mfhi	($25,$12,$15)
1601 	daddu	$2,$24
1602 	sltu	$1,$2,$24
1603 	 dmultu	($13,$14)		# forward multiplication
1604 	daddu	$2,$24
1605 	daddu	$1,$25
1606 	sltu	$24,$2,$24
1607 	daddu	$3,$1
1608 	daddu	$25,$24
1609 	sltu	$7,$3,$1
1610 	daddu	$3,$25
1611 	sltu	$25,$3,$25
1612 	daddu	$7,$25
1613 	mflo	($24,$13,$14)
1614 	mfhi	($25,$13,$14)
1615 	daddu	$2,$24
1616 	sltu	$1,$2,$24
1617 	 dmultu	($8,$12)		# forward multiplication
1618 	daddu	$2,$24
1619 	daddu	$1,$25
1620 	sltu	$24,$2,$24
1621 	daddu	$3,$1
1622 	daddu	$25,$24
1623 	sltu	$1,$3,$1
1624 	daddu	$3,$25
1625 	daddu	$7,$1
1626 	sltu	$25,$3,$25
1627 	daddu	$7,$25
1628 	mflo	($24,$8,$12)
1629 	mfhi	($25,$8,$12)
1630 	sd	$2,3*8($4)
1631 	daddu	$3,$24
1632 	sltu	$1,$3,$24
1633 	 dmultu	($15,$13)		# forward multiplication
1634 	daddu	$3,$24
1635 	daddu	$1,$25
1636 	sltu	$24,$3,$24
1637 	daddu	$7,$1
1638 	daddu	$25,$24
1639 	sltu	$2,$7,$1
1640 	daddu	$7,$25
1641 	sltu	$25,$7,$25
1642 	daddu	$2,$25
1643 	mflo	($24,$15,$13)
1644 	mfhi	($25,$15,$13)
1645 	daddu	$3,$24
1646 	sltu	$1,$3,$24
1647 	 dmultu	($14,$14)		# forward multiplication
1648 	daddu	$3,$24
1649 	daddu	$1,$25
1650 	sltu	$24,$3,$24
1651 	daddu	$7,$1
1652 	daddu	$25,$24
1653 	sltu	$1,$7,$1
1654 	daddu	$7,$25
1655 	daddu	$2,$1
1656 	sltu	$25,$7,$25
1657 	daddu	$2,$25
1658 	mflo	($24,$14,$14)
1659 	mfhi	($25,$14,$14)
1660 	daddu	$3,$24
1661 	sltu	$1,$3,$24
1662 	 dmultu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
1663 	daddu	$25,$1
1664 	daddu	$7,$25
1665 	sltu	$1,$7,$25
1666 	daddu	$2,$1
1667 	sd	$3,4*8($4)
1668 	mflo	($24,$12,$9)
1669 	mfhi	($25,$12,$9)
1670 	daddu	$7,$24
1671 	sltu	$1,$7,$24
1672 	 dmultu	($13,$8)		# forward multiplication
1673 	daddu	$7,$24
1674 	daddu	$1,$25
1675 	sltu	$24,$7,$24
1676 	daddu	$2,$1
1677 	daddu	$25,$24
1678 	sltu	$3,$2,$1
1679 	daddu	$2,$25
1680 	sltu	$25,$2,$25
1681 	daddu	$3,$25
1682 	mflo	($24,$13,$8)
1683 	mfhi	($25,$13,$8)
1684 	daddu	$7,$24
1685 	sltu	$1,$7,$24
1686 	 dmultu	($14,$15)		# forward multiplication
1687 	daddu	$7,$24
1688 	daddu	$1,$25
1689 	sltu	$24,$7,$24
1690 	daddu	$2,$1
1691 	daddu	$25,$24
1692 	sltu	$1,$2,$1
1693 	daddu	$2,$25
1694 	daddu	$3,$1
1695 	sltu	$25,$2,$25
1696 	daddu	$3,$25
1697 	mflo	($24,$14,$15)
1698 	mfhi	($25,$14,$15)
1699 	daddu	$7,$24
1700 	sltu	$1,$7,$24
1701 	 dmultu	($10,$12)		# forward multiplication
1702 	daddu	$7,$24
1703 	daddu	$1,$25
1704 	sltu	$24,$7,$24
1705 	daddu	$2,$1
1706 	daddu	$25,$24
1707 	sltu	$1,$2,$1
1708 	daddu	$2,$25
1709 	daddu	$3,$1
1710 	sltu	$25,$2,$25
1711 	daddu	$3,$25
1712 	mflo	($24,$10,$12)
1713 	mfhi	($25,$10,$12)
1714 	sd	$7,5*8($4)
1715 	daddu	$2,$24
1716 	sltu	$1,$2,$24
1717 	 dmultu	($9,$13)		# forward multiplication
1718 	daddu	$2,$24
1719 	daddu	$1,$25
1720 	sltu	$24,$2,$24
1721 	daddu	$3,$1
1722 	daddu	$25,$24
1723 	sltu	$7,$3,$1
1724 	daddu	$3,$25
1725 	sltu	$25,$3,$25
1726 	daddu	$7,$25
1727 	mflo	($24,$9,$13)
1728 	mfhi	($25,$9,$13)
1729 	daddu	$2,$24
1730 	sltu	$1,$2,$24
1731 	 dmultu	($8,$14)		# forward multiplication
1732 	daddu	$2,$24
1733 	daddu	$1,$25
1734 	sltu	$24,$2,$24
1735 	daddu	$3,$1
1736 	daddu	$25,$24
1737 	sltu	$1,$3,$1
1738 	daddu	$3,$25
1739 	daddu	$7,$1
1740 	sltu	$25,$3,$25
1741 	daddu	$7,$25
1742 	mflo	($24,$8,$14)
1743 	mfhi	($25,$8,$14)
1744 	daddu	$2,$24
1745 	sltu	$1,$2,$24
1746 	 dmultu	($15,$15)		# forward multiplication
1747 	daddu	$2,$24
1748 	daddu	$1,$25
1749 	sltu	$24,$2,$24
1750 	daddu	$3,$1
1751 	daddu	$25,$24
1752 	sltu	$1,$3,$1
1753 	daddu	$3,$25
1754 	daddu	$7,$1
1755 	sltu	$25,$3,$25
1756 	daddu	$7,$25
1757 	mflo	($24,$15,$15)
1758 	mfhi	($25,$15,$15)
1759 	daddu	$2,$24
1760 	sltu	$1,$2,$24
1761 	 dmultu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
1762 	daddu	$25,$1
1763 	daddu	$3,$25
1764 	sltu	$1,$3,$25
1765 	daddu	$7,$1
1766 	sd	$2,6*8($4)
1767 	mflo	($24,$12,$11)
1768 	mfhi	($25,$12,$11)
1769 	daddu	$3,$24
1770 	sltu	$1,$3,$24
1771 	 dmultu	($13,$10)		# forward multiplication
1772 	daddu	$3,$24
1773 	daddu	$1,$25
1774 	sltu	$24,$3,$24
1775 	daddu	$7,$1
1776 	daddu	$25,$24
1777 	sltu	$2,$7,$1
1778 	daddu	$7,$25
1779 	sltu	$25,$7,$25
1780 	daddu	$2,$25
1781 	mflo	($24,$13,$10)
1782 	mfhi	($25,$13,$10)
1783 	daddu	$3,$24
1784 	sltu	$1,$3,$24
1785 	 dmultu	($14,$9)		# forward multiplication
1786 	daddu	$3,$24
1787 	daddu	$1,$25
1788 	sltu	$24,$3,$24
1789 	daddu	$7,$1
1790 	daddu	$25,$24
1791 	sltu	$1,$7,$1
1792 	daddu	$7,$25
1793 	daddu	$2,$1
1794 	sltu	$25,$7,$25
1795 	daddu	$2,$25
1796 	mflo	($24,$14,$9)
1797 	mfhi	($25,$14,$9)
1798 	daddu	$3,$24
1799 	sltu	$1,$3,$24
1800 	 dmultu	($15,$8)		# forward multiplication
1801 	daddu	$3,$24
1802 	daddu	$1,$25
1803 	sltu	$24,$3,$24
1804 	daddu	$7,$1
1805 	daddu	$25,$24
1806 	sltu	$1,$7,$1
1807 	daddu	$7,$25
1808 	daddu	$2,$1
1809 	sltu	$25,$7,$25
1810 	daddu	$2,$25
1811 	mflo	($24,$15,$8)
1812 	mfhi	($25,$15,$8)
1813 	daddu	$3,$24
1814 	sltu	$1,$3,$24
1815 	 dmultu	($11,$13)		# forward multiplication
1816 	daddu	$3,$24
1817 	daddu	$1,$25
1818 	sltu	$24,$3,$24
1819 	daddu	$7,$1
1820 	daddu	$25,$24
1821 	sltu	$1,$7,$1
1822 	daddu	$7,$25
1823 	daddu	$2,$1
1824 	sltu	$25,$7,$25
1825 	daddu	$2,$25
1826 	mflo	($24,$11,$13)
1827 	mfhi	($25,$11,$13)
1828 	sd	$3,7*8($4)
1829 	daddu	$7,$24
1830 	sltu	$1,$7,$24
1831 	 dmultu	($10,$14)		# forward multiplication
1832 	daddu	$7,$24
1833 	daddu	$1,$25
1834 	sltu	$24,$7,$24
1835 	daddu	$2,$1
1836 	daddu	$25,$24
1837 	sltu	$3,$2,$1
1838 	daddu	$2,$25
1839 	sltu	$25,$2,$25
1840 	daddu	$3,$25
1841 	mflo	($24,$10,$14)
1842 	mfhi	($25,$10,$14)
1843 	daddu	$7,$24
1844 	sltu	$1,$7,$24
1845 	 dmultu	($9,$15)		# forward multiplication
1846 	daddu	$7,$24
1847 	daddu	$1,$25
1848 	sltu	$24,$7,$24
1849 	daddu	$2,$1
1850 	daddu	$25,$24
1851 	sltu	$1,$2,$1
1852 	daddu	$2,$25
1853 	daddu	$3,$1
1854 	sltu	$25,$2,$25
1855 	daddu	$3,$25
1856 	mflo	($24,$9,$15)
1857 	mfhi	($25,$9,$15)
1858 	daddu	$7,$24
1859 	sltu	$1,$7,$24
1860 	 dmultu	($8,$8)		# forward multiplication
1861 	daddu	$7,$24
1862 	daddu	$1,$25
1863 	sltu	$24,$7,$24
1864 	daddu	$2,$1
1865 	daddu	$25,$24
1866 	sltu	$1,$2,$1
1867 	daddu	$2,$25
1868 	daddu	$3,$1
1869 	sltu	$25,$2,$25
1870 	daddu	$3,$25
1871 	mflo	($24,$8,$8)
1872 	mfhi	($25,$8,$8)
1873 	daddu	$7,$24
1874 	sltu	$1,$7,$24
1875 	 dmultu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
1876 	daddu	$25,$1
1877 	daddu	$2,$25
1878 	sltu	$1,$2,$25
1879 	daddu	$3,$1
1880 	sd	$7,8*8($4)
1881 	mflo	($24,$14,$11)
1882 	mfhi	($25,$14,$11)
1883 	daddu	$2,$24
1884 	sltu	$1,$2,$24
1885 	 dmultu	($15,$10)		# forward multiplication
1886 	daddu	$2,$24
1887 	daddu	$1,$25
1888 	sltu	$24,$2,$24
1889 	daddu	$3,$1
1890 	daddu	$25,$24
1891 	sltu	$7,$3,$1
1892 	daddu	$3,$25
1893 	sltu	$25,$3,$25
1894 	daddu	$7,$25
1895 	mflo	($24,$15,$10)
1896 	mfhi	($25,$15,$10)
1897 	daddu	$2,$24
1898 	sltu	$1,$2,$24
1899 	 dmultu	($8,$9)		# forward multiplication
1900 	daddu	$2,$24
1901 	daddu	$1,$25
1902 	sltu	$24,$2,$24
1903 	daddu	$3,$1
1904 	daddu	$25,$24
1905 	sltu	$1,$3,$1
1906 	daddu	$3,$25
1907 	daddu	$7,$1
1908 	sltu	$25,$3,$25
1909 	daddu	$7,$25
1910 	mflo	($24,$8,$9)
1911 	mfhi	($25,$8,$9)
1912 	daddu	$2,$24
1913 	sltu	$1,$2,$24
1914 	 dmultu	($11,$15)		# forward multiplication
1915 	daddu	$2,$24
1916 	daddu	$1,$25
1917 	sltu	$24,$2,$24
1918 	daddu	$3,$1
1919 	daddu	$25,$24
1920 	sltu	$1,$3,$1
1921 	daddu	$3,$25
1922 	daddu	$7,$1
1923 	sltu	$25,$3,$25
1924 	daddu	$7,$25
1925 	mflo	($24,$11,$15)
1926 	mfhi	($25,$11,$15)
1927 	sd	$2,9*8($4)
1928 	daddu	$3,$24
1929 	sltu	$1,$3,$24
1930 	 dmultu	($10,$8)		# forward multiplication
1931 	daddu	$3,$24
1932 	daddu	$1,$25
1933 	sltu	$24,$3,$24
1934 	daddu	$7,$1
1935 	daddu	$25,$24
1936 	sltu	$2,$7,$1
1937 	daddu	$7,$25
1938 	sltu	$25,$7,$25
1939 	daddu	$2,$25
1940 	mflo	($24,$10,$8)
1941 	mfhi	($25,$10,$8)
1942 	daddu	$3,$24
1943 	sltu	$1,$3,$24
1944 	 dmultu	($9,$9)		# forward multiplication
1945 	daddu	$3,$24
1946 	daddu	$1,$25
1947 	sltu	$24,$3,$24
1948 	daddu	$7,$1
1949 	daddu	$25,$24
1950 	sltu	$1,$7,$1
1951 	daddu	$7,$25
1952 	daddu	$2,$1
1953 	sltu	$25,$7,$25
1954 	daddu	$2,$25
1955 	mflo	($24,$9,$9)
1956 	mfhi	($25,$9,$9)
1957 	daddu	$3,$24
1958 	sltu	$1,$3,$24
1959 	 dmultu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
1960 	daddu	$25,$1
1961 	daddu	$7,$25
1962 	sltu	$1,$7,$25
1963 	daddu	$2,$1
1964 	sd	$3,10*8($4)
1965 	mflo	($24,$8,$11)
1966 	mfhi	($25,$8,$11)
1967 	daddu	$7,$24
1968 	sltu	$1,$7,$24
1969 	 dmultu	($9,$10)		# forward multiplication
1970 	daddu	$7,$24
1971 	daddu	$1,$25
1972 	sltu	$24,$7,$24
1973 	daddu	$2,$1
1974 	daddu	$25,$24
1975 	sltu	$3,$2,$1
1976 	daddu	$2,$25
1977 	sltu	$25,$2,$25
1978 	daddu	$3,$25
1979 	mflo	($24,$9,$10)
1980 	mfhi	($25,$9,$10)
1981 	daddu	$7,$24
1982 	sltu	$1,$7,$24
1983 	 dmultu	($11,$9)		# forward multiplication
1984 	daddu	$7,$24
1985 	daddu	$1,$25
1986 	sltu	$24,$7,$24
1987 	daddu	$2,$1
1988 	daddu	$25,$24
1989 	sltu	$1,$2,$1
1990 	daddu	$2,$25
1991 	daddu	$3,$1
1992 	sltu	$25,$2,$25
1993 	daddu	$3,$25
1994 	mflo	($24,$11,$9)
1995 	mfhi	($25,$11,$9)
1996 	sd	$7,11*8($4)
1997 	daddu	$2,$24
1998 	sltu	$1,$2,$24
1999 	 dmultu	($10,$10)		# forward multiplication
2000 	daddu	$2,$24
2001 	daddu	$1,$25
2002 	sltu	$24,$2,$24
2003 	daddu	$3,$1
2004 	daddu	$25,$24
2005 	sltu	$7,$3,$1
2006 	daddu	$3,$25
2007 	sltu	$25,$3,$25
2008 	daddu	$7,$25
2009 	mflo	($24,$10,$10)
2010 	mfhi	($25,$10,$10)
2011 	daddu	$2,$24
2012 	sltu	$1,$2,$24
2013 	 dmultu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
2014 	daddu	$25,$1
2015 	daddu	$3,$25
2016 	sltu	$1,$3,$25
2017 	daddu	$7,$1
2018 	sd	$2,12*8($4)
2019 	mflo	($24,$10,$11)
2020 	mfhi	($25,$10,$11)
2021 	daddu	$3,$24
2022 	sltu	$1,$3,$24
2023 	 dmultu	($11,$11)		# forward multiplication
2024 	daddu	$3,$24
2025 	daddu	$1,$25
2026 	sltu	$24,$3,$24
2027 	daddu	$7,$1
2028 	daddu	$25,$24
2029 	sltu	$2,$7,$1
2030 	daddu	$7,$25
2031 	sltu	$25,$7,$25
2032 	daddu	$2,$25
2033 	mflo	($24,$11,$11)
2034 	mfhi	($25,$11,$11)
2035 	sd	$3,13*8($4)
2036 
2037 	daddu	$7,$24
2038 	sltu	$1,$7,$24
2039 	daddu	$25,$1
2040 	daddu	$2,$25
2041 	sd	$7,14*8($4)
2042 	sd	$2,15*8($4)
2043 
2044 	.set	noreorder
2045 	jr	$31
2046 	nop
2047 .end	bn_sqr_comba8
2048 
2049 .align	5
2050 .globl	bn_sqr_comba4
2051 .ent	bn_sqr_comba4
2052 bn_sqr_comba4:
2053 	.set	reorder
2054 	ld	$12,0($5)
2055 	ld	$13,8($5)
2056 	dmultu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
2057 	ld	$14,2*8($5)
2058 	ld	$15,3*8($5)
2059 	mflo	($2,$12,$12)
2060 	mfhi	($3,$12,$12)
2061 	sd	$2,0($4)
2062 
2063 	dmultu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
2064 	mflo	($24,$12,$13)
2065 	mfhi	($25,$12,$13)
2066 	slt	$2,$25,$0
2067 	dsll	$25,1
2068 	 dmultu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
2069 	slt	$6,$24,$0
2070 	daddu	$25,$6
2071 	dsll	$24,1
2072 	daddu	$3,$24
2073 	sltu	$1,$3,$24
2074 	daddu	$7,$25,$1
2075 	sd	$3,8($4)
2076 	sltu	$1,$7,$25
2077 	daddu	$2,$1
2078 	mflo	($24,$14,$12)
2079 	mfhi	($25,$14,$12)
2080 	daddu	$7,$24
2081 	sltu	$1,$7,$24
2082 	 dmultu	($13,$13)		# forward multiplication
2083 	daddu	$7,$24
2084 	daddu	$1,$25
2085 	sltu	$24,$7,$24
2086 	daddu	$2,$1
2087 	daddu	$25,$24
2088 	sltu	$3,$2,$1
2089 	daddu	$2,$25
2090 	sltu	$25,$2,$25
2091 	daddu	$3,$25
2092 	mflo	($24,$13,$13)
2093 	mfhi	($25,$13,$13)
2094 	daddu	$7,$24
2095 	sltu	$1,$7,$24
2096 	 dmultu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
2097 	daddu	$25,$1
2098 	daddu	$2,$25
2099 	sltu	$1,$2,$25
2100 	daddu	$3,$1
2101 	sd	$7,2*8($4)
2102 	mflo	($24,$12,$15)
2103 	mfhi	($25,$12,$15)
2104 	daddu	$2,$24
2105 	sltu	$1,$2,$24
2106 	 dmultu	($13,$14)		# forward multiplication
2107 	daddu	$2,$24
2108 	daddu	$1,$25
2109 	sltu	$24,$2,$24
2110 	daddu	$3,$1
2111 	daddu	$25,$24
2112 	sltu	$7,$3,$1
2113 	daddu	$3,$25
2114 	sltu	$25,$3,$25
2115 	daddu	$7,$25
2116 	mflo	($24,$13,$14)
2117 	mfhi	($25,$13,$14)
2118 	daddu	$2,$24
2119 	sltu	$1,$2,$24
2120 	 dmultu	($15,$13)		# forward multiplication
2121 	daddu	$2,$24
2122 	daddu	$1,$25
2123 	sltu	$24,$2,$24
2124 	daddu	$3,$1
2125 	daddu	$25,$24
2126 	sltu	$1,$3,$1
2127 	daddu	$3,$25
2128 	daddu	$7,$1
2129 	sltu	$25,$3,$25
2130 	daddu	$7,$25
2131 	mflo	($24,$15,$13)
2132 	mfhi	($25,$15,$13)
2133 	sd	$2,3*8($4)
2134 	daddu	$3,$24
2135 	sltu	$1,$3,$24
2136 	 dmultu	($14,$14)		# forward multiplication
2137 	daddu	$3,$24
2138 	daddu	$1,$25
2139 	sltu	$24,$3,$24
2140 	daddu	$7,$1
2141 	daddu	$25,$24
2142 	sltu	$2,$7,$1
2143 	daddu	$7,$25
2144 	sltu	$25,$7,$25
2145 	daddu	$2,$25
2146 	mflo	($24,$14,$14)
2147 	mfhi	($25,$14,$14)
2148 	daddu	$3,$24
2149 	sltu	$1,$3,$24
2150 	 dmultu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
2151 	daddu	$25,$1
2152 	daddu	$7,$25
2153 	sltu	$1,$7,$25
2154 	daddu	$2,$1
2155 	sd	$3,4*8($4)
2156 	mflo	($24,$14,$15)
2157 	mfhi	($25,$14,$15)
2158 	daddu	$7,$24
2159 	sltu	$1,$7,$24
2160 	 dmultu	($15,$15)		# forward multiplication
2161 	daddu	$7,$24
2162 	daddu	$1,$25
2163 	sltu	$24,$7,$24
2164 	daddu	$2,$1
2165 	daddu	$25,$24
2166 	sltu	$3,$2,$1
2167 	daddu	$2,$25
2168 	sltu	$25,$2,$25
2169 	daddu	$3,$25
2170 	mflo	($24,$15,$15)
2171 	mfhi	($25,$15,$15)
2172 	sd	$7,5*8($4)
2173 
2174 	daddu	$2,$24
2175 	sltu	$1,$2,$24
2176 	daddu	$25,$1
2177 	daddu	$3,$25
2178 	sd	$2,6*8($4)
2179 	sd	$3,7*8($4)
2180 
2181 	.set	noreorder
2182 	jr	$31
2183 	nop
2184 .end	bn_sqr_comba4
2185