1 #include "mips_arch.h"
2 
3 .text
4 
5 .set	noat
6 .set	noreorder
7 
8 .align	5
9 .globl	bn_mul_mont
10 .ent	bn_mul_mont
11 bn_mul_mont:
12 	slt	$1,$9,4
13 	bnez	$1,1f
14 	li	$2,0
15 	slt	$1,$9,17	# on in-order CPU
16 	bnez	$1,bn_mul_mont_internal
17 	nop
18 1:	jr	$31
19 	li	$4,0
20 .end	bn_mul_mont
21 
22 .align	5
23 .ent	bn_mul_mont_internal
24 bn_mul_mont_internal:
25 	.frame	$30,14*8,$31
26 	.mask	0x40000000|16711680,-8
27 	dsubu $29,14*8
28 	sd	$30,(14-1)*8($29)
29 	sd	$23,(14-2)*8($29)
30 	sd	$22,(14-3)*8($29)
31 	sd	$21,(14-4)*8($29)
32 	sd	$20,(14-5)*8($29)
33 	sd	$19,(14-6)*8($29)
34 	sd	$18,(14-7)*8($29)
35 	sd	$17,(14-8)*8($29)
36 	sd	$16,(14-9)*8($29)
37 	move	$30,$29
38 
39 	.set	reorder
40 	ld	$8,0($8)
41 	ld	$13,0($6)	# bp[0]
42 	ld	$12,0($5)	# ap[0]
43 	ld	$14,0($7)	# np[0]
44 
45 	dsubu $29,2*8	# place for two extra words
46 	sll	$9,3
47 	li	$1,-4096
48 	dsubu $29,$9
49 	and	$29,$1
50 
51 	dmultu	($12,$13)
52 	ld	$17,8($5)
53 	ld	$19,8($7)
54 	mflo	($10,$12,$13)
55 	mfhi	($11,$12,$13)
56 	dmultu	($10,$8)
57 	mflo	($23,$10,$8)
58 
59 	dmultu	($17,$13)
60 	mflo	($16,$17,$13)
61 	mfhi	($17,$17,$13)
62 
63 	dmultu	($14,$23)
64 	mflo	($24,$14,$23)
65 	mfhi	($25,$14,$23)
66 	dmultu	($19,$23)
67 	daddu	$24,$10
68 	sltu	$1,$24,$10
69 	daddu	$25,$1
70 	mflo	($18,$19,$23)
71 	mfhi	($19,$19,$23)
72 
73 	move	$15,$29
74 	li	$22,2*8
75 .align	4
76 .L1st:
77 	.set	noreorder
78 	daddu $12,$5,$22
79 	daddu $14,$7,$22
80 	ld	$12,($12)
81 	ld	$14,($14)
82 
83 	dmultu	($12,$13)
84 	daddu	$10,$16,$11
85 	daddu	$24,$18,$25
86 	sltu	$1,$10,$11
87 	sltu	$2,$24,$25
88 	daddu	$11,$17,$1
89 	daddu	$25,$19,$2
90 	mflo	($16,$12,$13)
91 	mfhi	($17,$12,$13)
92 
93 	daddu	$24,$10
94 	sltu	$1,$24,$10
95 	dmultu	($14,$23)
96 	daddu	$25,$1
97 	addu	$22,8
98 	sd	$24,($15)
99 	sltu	$2,$22,$9
100 	mflo	($18,$14,$23)
101 	mfhi	($19,$14,$23)
102 
103 	bnez	$2,.L1st
104 	daddu $15,8
105 	.set	reorder
106 
107 	daddu	$10,$16,$11
108 	sltu	$1,$10,$11
109 	daddu	$11,$17,$1
110 
111 	daddu	$24,$18,$25
112 	sltu	$2,$24,$25
113 	daddu	$25,$19,$2
114 	daddu	$24,$10
115 	sltu	$1,$24,$10
116 	daddu	$25,$1
117 
118 	sd	$24,($15)
119 
120 	daddu	$25,$11
121 	sltu	$1,$25,$11
122 	sd	$25,8($15)
123 	sd	$1,2*8($15)
124 
125 	li	$21,8
126 .align	4
127 .Louter:
128 	daddu $13,$6,$21
129 	ld	$13,($13)
130 	ld	$12,($5)
131 	ld	$17,8($5)
132 	ld	$20,($29)
133 
134 	dmultu	($12,$13)
135 	ld	$14,($7)
136 	ld	$19,8($7)
137 	mflo	($10,$12,$13)
138 	mfhi	($11,$12,$13)
139 	daddu	$10,$20
140 	dmultu	($10,$8)
141 	sltu	$1,$10,$20
142 	daddu	$11,$1
143 	mflo	($23,$10,$8)
144 
145 	dmultu	($17,$13)
146 	mflo	($16,$17,$13)
147 	mfhi	($17,$17,$13)
148 
149 	dmultu	($14,$23)
150 	mflo	($24,$14,$23)
151 	mfhi	($25,$14,$23)
152 
153 	dmultu	($19,$23)
154 	daddu	$24,$10
155 	sltu	$1,$24,$10
156 	daddu	$25,$1
157 	mflo	($18,$19,$23)
158 	mfhi	($19,$19,$23)
159 
160 	move	$15,$29
161 	li	$22,2*8
162 	ld	$20,8($15)
163 .align	4
164 .Linner:
165 	.set	noreorder
166 	daddu $12,$5,$22
167 	daddu $14,$7,$22
168 	ld	$12,($12)
169 	ld	$14,($14)
170 
171 	dmultu	($12,$13)
172 	daddu	$10,$16,$11
173 	daddu	$24,$18,$25
174 	sltu	$1,$10,$11
175 	sltu	$2,$24,$25
176 	daddu	$11,$17,$1
177 	daddu	$25,$19,$2
178 	mflo	($16,$12,$13)
179 	mfhi	($17,$12,$13)
180 
181 	daddu	$10,$20
182 	addu	$22,8
183 	dmultu	($14,$23)
184 	sltu	$1,$10,$20
185 	daddu	$24,$10
186 	daddu	$11,$1
187 	sltu	$2,$24,$10
188 	ld	$20,2*8($15)
189 	daddu	$25,$2
190 	sltu	$1,$22,$9
191 	mflo	($18,$14,$23)
192 	mfhi	($19,$14,$23)
193 	sd	$24,($15)
194 	bnez	$1,.Linner
195 	daddu $15,8
196 	.set	reorder
197 
198 	daddu	$10,$16,$11
199 	sltu	$1,$10,$11
200 	daddu	$11,$17,$1
201 	daddu	$10,$20
202 	sltu	$2,$10,$20
203 	daddu	$11,$2
204 
205 	ld	$20,2*8($15)
206 	daddu	$24,$18,$25
207 	sltu	$1,$24,$25
208 	daddu	$25,$19,$1
209 	daddu	$24,$10
210 	sltu	$2,$24,$10
211 	daddu	$25,$2
212 	sd	$24,($15)
213 
214 	daddu	$24,$25,$11
215 	sltu	$25,$24,$11
216 	daddu	$24,$20
217 	sltu	$1,$24,$20
218 	daddu	$25,$1
219 	sd	$24,8($15)
220 	sd	$25,2*8($15)
221 
222 	addu	$21,8
223 	sltu	$2,$21,$9
224 	bnez	$2,.Louter
225 
226 	.set	noreorder
227 	daddu $20,$29,$9	# &tp[num]
228 	move	$15,$29
229 	move	$5,$29
230 	li	$11,0		# clear borrow bit
231 
232 .align	4
233 .Lsub:	ld	$10,($15)
234 	ld	$24,($7)
235 	daddu $15,8
236 	daddu $7,8
237 	dsubu	$24,$10,$24	# tp[i]-np[i]
238 	sgtu	$1,$24,$10
239 	dsubu	$10,$24,$11
240 	sgtu	$11,$10,$24
241 	sd	$10,($4)
242 	or	$11,$1
243 	sltu	$1,$15,$20
244 	bnez	$1,.Lsub
245 	daddu $4,8
246 
247 	dsubu	$11,$25,$11	# handle upmost overflow bit
248 	move	$15,$29
249 	dsubu $4,$9	# restore rp
250 	not	$25,$11
251 
252 .Lcopy:	ld	$14,($15)	# conditional move
253 	ld	$12,($4)
254 	sd	$0,($15)
255 	daddu $15,8
256 	and	$14,$11
257 	and	$12,$25
258 	or	$12,$14
259 	sltu	$1,$15,$20
260 	sd	$12,($4)
261 	bnez	$1,.Lcopy
262 	daddu $4,8
263 
264 	li	$4,1
265 	li	$2,1
266 
267 	.set	noreorder
268 	move	$29,$30
269 	ld	$30,(14-1)*8($29)
270 	ld	$23,(14-2)*8($29)
271 	ld	$22,(14-3)*8($29)
272 	ld	$21,(14-4)*8($29)
273 	ld	$20,(14-5)*8($29)
274 	ld	$19,(14-6)*8($29)
275 	ld	$18,(14-7)*8($29)
276 	ld	$17,(14-8)*8($29)
277 	ld	$16,(14-9)*8($29)
278 	jr	$31
279 	daddu $29,14*8
280 .end	bn_mul_mont_internal
281 .rdata
282 .asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
283