1e1051a39Sopenharmony_ci// Copyright 2004-2017 The OpenSSL Project Authors. All Rights Reserved.
2e1051a39Sopenharmony_ci//
3e1051a39Sopenharmony_ci// Licensed under the Apache License 2.0 (the "License").  You may not use
4e1051a39Sopenharmony_ci// this file except in compliance with the License.  You can obtain a copy
5e1051a39Sopenharmony_ci// in the file LICENSE in the source distribution or at
6e1051a39Sopenharmony_ci// https://www.openssl.org/source/license.html
7e1051a39Sopenharmony_ci// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
8e1051a39Sopenharmony_ci// On Win64i compile with ias.exe.
9e1051a39Sopenharmony_ci.text
10e1051a39Sopenharmony_ci
11e1051a39Sopenharmony_ci#if defined(_HPUX_SOURCE) && !defined(_LP64)
12e1051a39Sopenharmony_ci#define	ADDP	addp4
13e1051a39Sopenharmony_ci#else
14e1051a39Sopenharmony_ci#define	ADDP	add
15e1051a39Sopenharmony_ci#endif
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci.global	OPENSSL_cpuid_setup#
18e1051a39Sopenharmony_ci.proc	OPENSSL_cpuid_setup#
19e1051a39Sopenharmony_ciOPENSSL_cpuid_setup:
20e1051a39Sopenharmony_ci{ .mib;	br.ret.sptk.many	b0		};;
21e1051a39Sopenharmony_ci.endp	OPENSSL_cpuid_setup#
22e1051a39Sopenharmony_ci
23e1051a39Sopenharmony_ci.global	OPENSSL_rdtsc#
24e1051a39Sopenharmony_ci.proc	OPENSSL_rdtsc#
25e1051a39Sopenharmony_ciOPENSSL_rdtsc:
26e1051a39Sopenharmony_ci{ .mib;	mov			r8=ar.itc
27e1051a39Sopenharmony_ci	br.ret.sptk.many	b0		};;
28e1051a39Sopenharmony_ci.endp   OPENSSL_rdtsc#
29e1051a39Sopenharmony_ci
30e1051a39Sopenharmony_ci.global	OPENSSL_atomic_add#
31e1051a39Sopenharmony_ci.proc	OPENSSL_atomic_add#
32e1051a39Sopenharmony_ci.align	32
33e1051a39Sopenharmony_ciOPENSSL_atomic_add:
34e1051a39Sopenharmony_ci{ .mii;	ld4		r2=[r32]
35e1051a39Sopenharmony_ci	nop.i		0
36e1051a39Sopenharmony_ci	nop.i		0		};;
37e1051a39Sopenharmony_ci.Lspin:
38e1051a39Sopenharmony_ci{ .mii;	mov		ar.ccv=r2
39e1051a39Sopenharmony_ci	add		r8=r2,r33
40e1051a39Sopenharmony_ci	mov		r3=r2		};;
41e1051a39Sopenharmony_ci{ .mmi;	mf;;
42e1051a39Sopenharmony_ci	cmpxchg4.acq	r2=[r32],r8,ar.ccv
43e1051a39Sopenharmony_ci	nop.i		0		};;
44e1051a39Sopenharmony_ci{ .mib;	cmp.ne		p6,p0=r2,r3
45e1051a39Sopenharmony_ci	nop.i		0
46e1051a39Sopenharmony_ci(p6)	br.dpnt		.Lspin		};;
47e1051a39Sopenharmony_ci{ .mib;	nop.m		0
48e1051a39Sopenharmony_ci	sxt4		r8=r8
49e1051a39Sopenharmony_ci	br.ret.sptk.many	b0	};;
50e1051a39Sopenharmony_ci.endp	OPENSSL_atomic_add#
51e1051a39Sopenharmony_ci
52e1051a39Sopenharmony_ci// Returns a structure comprising pointer to the top of stack of
53e1051a39Sopenharmony_ci// the caller and pointer beyond backing storage for the current
54e1051a39Sopenharmony_ci// register frame. The latter is required, because it might be
55e1051a39Sopenharmony_ci// insufficient to wipe backing storage for the current frame
56e1051a39Sopenharmony_ci// (as this procedure does), one might have to go further, toward
57e1051a39Sopenharmony_ci// higher addresses to reach for whole "retroactively" saved
58e1051a39Sopenharmony_ci// context...
59e1051a39Sopenharmony_ci.global	OPENSSL_wipe_cpu#
60e1051a39Sopenharmony_ci.proc	OPENSSL_wipe_cpu#
61e1051a39Sopenharmony_ci.align	32
62e1051a39Sopenharmony_ciOPENSSL_wipe_cpu:
63e1051a39Sopenharmony_ci	.prologue
64e1051a39Sopenharmony_ci	.fframe	0
65e1051a39Sopenharmony_ci	.save	ar.pfs,r2
66e1051a39Sopenharmony_ci	.save	ar.lc,r3
67e1051a39Sopenharmony_ci{ .mib;	alloc		r2=ar.pfs,0,96,0,96
68e1051a39Sopenharmony_ci	mov		r3=ar.lc
69e1051a39Sopenharmony_ci	brp.loop.imp	.L_wipe_top,.L_wipe_end-16
70e1051a39Sopenharmony_ci					};;
71e1051a39Sopenharmony_ci{ .mii;	mov		r9=ar.bsp
72e1051a39Sopenharmony_ci	mov		r8=pr
73e1051a39Sopenharmony_ci	mov		ar.lc=96	};;
74e1051a39Sopenharmony_ci	.body
75e1051a39Sopenharmony_ci{ .mii;	add		r9=96*8-8,r9
76e1051a39Sopenharmony_ci	mov		ar.ec=1		};;
77e1051a39Sopenharmony_ci
78e1051a39Sopenharmony_ci// One can sweep double as fast, but then we can't guarantee
79e1051a39Sopenharmony_ci// that backing storage is wiped...
80e1051a39Sopenharmony_ci.L_wipe_top:
81e1051a39Sopenharmony_ci{ .mfi;	st8		[r9]=r0,-8
82e1051a39Sopenharmony_ci	mov		f127=f0
83e1051a39Sopenharmony_ci	mov		r127=r0		}
84e1051a39Sopenharmony_ci{ .mfb;	nop.m		0
85e1051a39Sopenharmony_ci	nop.f		0
86e1051a39Sopenharmony_ci	br.ctop.sptk	.L_wipe_top	};;
87e1051a39Sopenharmony_ci.L_wipe_end:
88e1051a39Sopenharmony_ci
89e1051a39Sopenharmony_ci{ .mfi;	mov		r11=r0
90e1051a39Sopenharmony_ci	mov		f6=f0
91e1051a39Sopenharmony_ci	mov		r14=r0		}
92e1051a39Sopenharmony_ci{ .mfi;	mov		r15=r0
93e1051a39Sopenharmony_ci	mov		f7=f0
94e1051a39Sopenharmony_ci	mov		r16=r0		}
95e1051a39Sopenharmony_ci{ .mfi;	mov		r17=r0
96e1051a39Sopenharmony_ci	mov		f8=f0
97e1051a39Sopenharmony_ci	mov		r18=r0		}
98e1051a39Sopenharmony_ci{ .mfi;	mov		r19=r0
99e1051a39Sopenharmony_ci	mov		f9=f0
100e1051a39Sopenharmony_ci	mov		r20=r0		}
101e1051a39Sopenharmony_ci{ .mfi;	mov		r21=r0
102e1051a39Sopenharmony_ci	mov		f10=f0
103e1051a39Sopenharmony_ci	mov		r22=r0		}
104e1051a39Sopenharmony_ci{ .mfi;	mov		r23=r0
105e1051a39Sopenharmony_ci	mov		f11=f0
106e1051a39Sopenharmony_ci	mov		r24=r0		}
107e1051a39Sopenharmony_ci{ .mfi;	mov		r25=r0
108e1051a39Sopenharmony_ci	mov		f12=f0
109e1051a39Sopenharmony_ci	mov		r26=r0		}
110e1051a39Sopenharmony_ci{ .mfi;	mov		r27=r0
111e1051a39Sopenharmony_ci	mov		f13=f0
112e1051a39Sopenharmony_ci	mov		r28=r0		}
113e1051a39Sopenharmony_ci{ .mfi;	mov		r29=r0
114e1051a39Sopenharmony_ci	mov		f14=f0
115e1051a39Sopenharmony_ci	mov		r30=r0		}
116e1051a39Sopenharmony_ci{ .mfi;	mov		r31=r0
117e1051a39Sopenharmony_ci	mov		f15=f0
118e1051a39Sopenharmony_ci	nop.i		0		}
119e1051a39Sopenharmony_ci{ .mfi;	mov		f16=f0		}
120e1051a39Sopenharmony_ci{ .mfi;	mov		f17=f0		}
121e1051a39Sopenharmony_ci{ .mfi;	mov		f18=f0		}
122e1051a39Sopenharmony_ci{ .mfi;	mov		f19=f0		}
123e1051a39Sopenharmony_ci{ .mfi;	mov		f20=f0		}
124e1051a39Sopenharmony_ci{ .mfi;	mov		f21=f0		}
125e1051a39Sopenharmony_ci{ .mfi;	mov		f22=f0		}
126e1051a39Sopenharmony_ci{ .mfi;	mov		f23=f0		}
127e1051a39Sopenharmony_ci{ .mfi;	mov		f24=f0		}
128e1051a39Sopenharmony_ci{ .mfi;	mov		f25=f0		}
129e1051a39Sopenharmony_ci{ .mfi;	mov		f26=f0		}
130e1051a39Sopenharmony_ci{ .mfi;	mov		f27=f0		}
131e1051a39Sopenharmony_ci{ .mfi;	mov		f28=f0		}
132e1051a39Sopenharmony_ci{ .mfi;	mov		f29=f0		}
133e1051a39Sopenharmony_ci{ .mfi;	mov		f30=f0		}
134e1051a39Sopenharmony_ci{ .mfi;	add		r9=96*8+8,r9
135e1051a39Sopenharmony_ci	mov		f31=f0
136e1051a39Sopenharmony_ci	mov		pr=r8,0x1ffff	}
137e1051a39Sopenharmony_ci{ .mib;	mov		r8=sp
138e1051a39Sopenharmony_ci	mov		ar.lc=r3
139e1051a39Sopenharmony_ci	br.ret.sptk	b0		};;
140e1051a39Sopenharmony_ci.endp	OPENSSL_wipe_cpu#
141e1051a39Sopenharmony_ci
142e1051a39Sopenharmony_ci.global	OPENSSL_cleanse#
143e1051a39Sopenharmony_ci.proc	OPENSSL_cleanse#
144e1051a39Sopenharmony_ciOPENSSL_cleanse:
145e1051a39Sopenharmony_ci{ .mib;	cmp.eq		p6,p0=0,r33	    // len==0
146e1051a39Sopenharmony_ci	ADDP		r32=0,r32
147e1051a39Sopenharmony_ci(p6)	br.ret.spnt	b0		};;
148e1051a39Sopenharmony_ci{ .mib;	and		r2=7,r32
149e1051a39Sopenharmony_ci	cmp.leu		p6,p0=15,r33	    // len>=15
150e1051a39Sopenharmony_ci(p6)	br.cond.dptk	.Lot		};;
151e1051a39Sopenharmony_ci
152e1051a39Sopenharmony_ci.Little:
153e1051a39Sopenharmony_ci{ .mib;	st1		[r32]=r0,1
154e1051a39Sopenharmony_ci	cmp.ltu		p6,p7=1,r33	}  // len>1
155e1051a39Sopenharmony_ci{ .mbb;	add		r33=-1,r33	   // len--
156e1051a39Sopenharmony_ci(p6)	br.cond.dptk	.Little
157e1051a39Sopenharmony_ci(p7)	br.ret.sptk.many	b0	};;
158e1051a39Sopenharmony_ci
159e1051a39Sopenharmony_ci.Lot:
160e1051a39Sopenharmony_ci{ .mib;	cmp.eq		p6,p0=0,r2
161e1051a39Sopenharmony_ci(p6)	br.cond.dptk	.Laligned	};;
162e1051a39Sopenharmony_ci{ .mmi;	st1		[r32]=r0,1;;
163e1051a39Sopenharmony_ci	and		r2=7,r32	}
164e1051a39Sopenharmony_ci{ .mib;	add		r33=-1,r33
165e1051a39Sopenharmony_ci	br		.Lot		};;
166e1051a39Sopenharmony_ci
167e1051a39Sopenharmony_ci.Laligned:
168e1051a39Sopenharmony_ci{ .mmi;	st8		[r32]=r0,8
169e1051a39Sopenharmony_ci	and		r2=-8,r33	    // len&~7
170e1051a39Sopenharmony_ci	add		r33=-8,r33	};; // len-=8
171e1051a39Sopenharmony_ci{ .mib;	cmp.ltu		p6,p0=8,r2	    // ((len+8)&~7)>8
172e1051a39Sopenharmony_ci(p6)	br.cond.dptk	.Laligned	};;
173e1051a39Sopenharmony_ci
174e1051a39Sopenharmony_ci{ .mbb;	cmp.eq		p6,p7=r0,r33
175e1051a39Sopenharmony_ci(p7)	br.cond.dpnt	.Little
176e1051a39Sopenharmony_ci(p6)	br.ret.sptk.many	b0	};;
177e1051a39Sopenharmony_ci.endp	OPENSSL_cleanse#
178e1051a39Sopenharmony_ci
179e1051a39Sopenharmony_ci.global	CRYPTO_memcmp#
180e1051a39Sopenharmony_ci.proc	CRYPTO_memcmp#
181e1051a39Sopenharmony_ci.align	32
182e1051a39Sopenharmony_ci.skip	16
183e1051a39Sopenharmony_ciCRYPTO_memcmp:
184e1051a39Sopenharmony_ci	.prologue
185e1051a39Sopenharmony_ci{ .mib;	mov		r8=0
186e1051a39Sopenharmony_ci	cmp.eq		p6,p0=0,r34	    // len==0?
187e1051a39Sopenharmony_ci(p6)	br.ret.spnt	b0		};;
188e1051a39Sopenharmony_ci	.save		ar.pfs,r2
189e1051a39Sopenharmony_ci{ .mib;	alloc		r2=ar.pfs,3,5,0,8
190e1051a39Sopenharmony_ci	.save		ar.lc,r3
191e1051a39Sopenharmony_ci	mov		r3=ar.lc
192e1051a39Sopenharmony_ci	brp.loop.imp	.Loop_cmp_ctop,.Loop_cmp_cend-16
193e1051a39Sopenharmony_ci					}
194e1051a39Sopenharmony_ci{ .mib;	sub		r10=r34,r0,1
195e1051a39Sopenharmony_ci	.save		pr,r9
196e1051a39Sopenharmony_ci	mov		r9=pr		};;
197e1051a39Sopenharmony_ci{ .mii;	ADDP		r16=0,r32
198e1051a39Sopenharmony_ci	mov		ar.lc=r10
199e1051a39Sopenharmony_ci	mov		ar.ec=4		}
200e1051a39Sopenharmony_ci{ .mib;	ADDP		r17=0,r33
201e1051a39Sopenharmony_ci	mov		pr.rot=1<<16	};;
202e1051a39Sopenharmony_ci
203e1051a39Sopenharmony_ci.Loop_cmp_ctop:
204e1051a39Sopenharmony_ci{ .mib;	(p16)	ld1	r32=[r16],1
205e1051a39Sopenharmony_ci	(p18)	xor	r34=r34,r38	}
206e1051a39Sopenharmony_ci{ .mib;	(p16)	ld1	r36=[r17],1
207e1051a39Sopenharmony_ci	(p19)	or	r8=r8,r35
208e1051a39Sopenharmony_ci	br.ctop.sptk	.Loop_cmp_ctop	};;
209e1051a39Sopenharmony_ci.Loop_cmp_cend:
210e1051a39Sopenharmony_ci
211e1051a39Sopenharmony_ci{ .mib;	cmp.ne		p6,p0=0,r8
212e1051a39Sopenharmony_ci	mov		ar.lc=r3	};;
213e1051a39Sopenharmony_ci{ .mib;
214e1051a39Sopenharmony_ci(p6)	mov		r8=1
215e1051a39Sopenharmony_ci	mov		pr=r9,0x1ffff
216e1051a39Sopenharmony_ci	br.ret.sptk.many	b0	};;
217e1051a39Sopenharmony_ci.endp	CRYPTO_memcmp#
218e1051a39Sopenharmony_ci
219e1051a39Sopenharmony_ci.global	OPENSSL_instrument_bus#
220e1051a39Sopenharmony_ci.proc	OPENSSL_instrument_bus#
221e1051a39Sopenharmony_ciOPENSSL_instrument_bus:
222e1051a39Sopenharmony_ci{ .mmi;	mov		r2=r33
223e1051a39Sopenharmony_ci	ADDP		r32=0,r32	}
224e1051a39Sopenharmony_ci{ .mmi;	mov		r8=ar.itc;;
225e1051a39Sopenharmony_ci	mov		r10=r0
226e1051a39Sopenharmony_ci	mov		r9=r8		};;
227e1051a39Sopenharmony_ci
228e1051a39Sopenharmony_ci{ .mmi;	fc		r32;;
229e1051a39Sopenharmony_ci	ld4		r8=[r32]	};;
230e1051a39Sopenharmony_ci{ .mmi;	mf
231e1051a39Sopenharmony_ci	mov		ar.ccv=r8
232e1051a39Sopenharmony_ci	add		r8=r8,r10	};;
233e1051a39Sopenharmony_ci{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
234e1051a39Sopenharmony_ci					};;
235e1051a39Sopenharmony_ci.Loop:
236e1051a39Sopenharmony_ci{ .mmi;	mov		r8=ar.itc;;
237e1051a39Sopenharmony_ci	sub		r10=r8,r9		// diff=tick-lasttick
238e1051a39Sopenharmony_ci	mov		r9=r8		};;	// lasttick=tick
239e1051a39Sopenharmony_ci{ .mmi;	fc		r32;;
240e1051a39Sopenharmony_ci	ld4		r8=[r32]	};;
241e1051a39Sopenharmony_ci{ .mmi;	mf
242e1051a39Sopenharmony_ci	mov		ar.ccv=r8
243e1051a39Sopenharmony_ci	add		r8=r8,r10	};;
244e1051a39Sopenharmony_ci{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
245e1051a39Sopenharmony_ci	add		r33=-1,r33
246e1051a39Sopenharmony_ci	add		r32=4,r32	};;
247e1051a39Sopenharmony_ci{ .mib;	cmp4.ne		p6,p0=0,r33
248e1051a39Sopenharmony_ci(p6)	br.cond.dptk	.Loop		};;
249e1051a39Sopenharmony_ci
250e1051a39Sopenharmony_ci{ .mib;	sub		r8=r2,r33
251e1051a39Sopenharmony_ci	br.ret.sptk.many	b0	};;
252e1051a39Sopenharmony_ci.endp	OPENSSL_instrument_bus#
253e1051a39Sopenharmony_ci
254e1051a39Sopenharmony_ci.global	OPENSSL_instrument_bus2#
255e1051a39Sopenharmony_ci.proc	OPENSSL_instrument_bus2#
256e1051a39Sopenharmony_ciOPENSSL_instrument_bus2:
257e1051a39Sopenharmony_ci{ .mmi;	mov		r2=r33			// put aside cnt
258e1051a39Sopenharmony_ci	ADDP		r32=0,r32	}
259e1051a39Sopenharmony_ci{ .mmi;	mov		r8=ar.itc;;
260e1051a39Sopenharmony_ci	mov		r10=r0
261e1051a39Sopenharmony_ci	mov		r9=r8		};;
262e1051a39Sopenharmony_ci
263e1051a39Sopenharmony_ci{ .mmi;	fc		r32;;
264e1051a39Sopenharmony_ci	ld4		r8=[r32]	};;
265e1051a39Sopenharmony_ci{ .mmi;	mf
266e1051a39Sopenharmony_ci	mov		ar.ccv=r8
267e1051a39Sopenharmony_ci	add		r8=r8,r10	};;
268e1051a39Sopenharmony_ci{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
269e1051a39Sopenharmony_ci					};;
270e1051a39Sopenharmony_ci
271e1051a39Sopenharmony_ci{ .mmi;	mov		r8=ar.itc;;
272e1051a39Sopenharmony_ci	sub		r10=r8,r9
273e1051a39Sopenharmony_ci	mov		r9=r8		};;
274e1051a39Sopenharmony_ci.Loop2:
275e1051a39Sopenharmony_ci{ .mmi;	mov		r11=r10			// lastdiff=diff
276e1051a39Sopenharmony_ci	add		r34=-1,r34	};;	// --max
277e1051a39Sopenharmony_ci{ .mmi;	fc		r32;;
278e1051a39Sopenharmony_ci	ld4		r8=[r32]
279e1051a39Sopenharmony_ci	cmp4.eq		p6,p0=0,r34	};;
280e1051a39Sopenharmony_ci{ .mmi;	mf
281e1051a39Sopenharmony_ci	mov		ar.ccv=r8
282e1051a39Sopenharmony_ci	add		r8=r8,r10	};;
283e1051a39Sopenharmony_ci{ .mmb;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
284e1051a39Sopenharmony_ci(p6)	br.cond.spnt	.Ldone2		};;
285e1051a39Sopenharmony_ci
286e1051a39Sopenharmony_ci{ .mmi;	mov		r8=ar.itc;;
287e1051a39Sopenharmony_ci	sub		r10=r8,r9		// diff=tick-lasttick
288e1051a39Sopenharmony_ci	mov		r9=r8		};;	// lasttick=tick
289e1051a39Sopenharmony_ci{ .mmi;	cmp.ne		p6,p0=r10,r11;;		// diff!=lastdiff
290e1051a39Sopenharmony_ci(p6)	add		r33=-1,r33	};;	// conditional --cnt
291e1051a39Sopenharmony_ci{ .mib;	cmp4.ne		p7,p0=0,r33
292e1051a39Sopenharmony_ci(p6)	add		r32=4,r32		// conditional ++out
293e1051a39Sopenharmony_ci(p7)	br.cond.dptk	.Loop2		};;
294e1051a39Sopenharmony_ci.Ldone2:
295e1051a39Sopenharmony_ci{ .mib;	sub		r8=r2,r33
296e1051a39Sopenharmony_ci	br.ret.sptk.many	b0	};;
297e1051a39Sopenharmony_ci.endp	OPENSSL_instrument_bus2#
298