1e1051a39Sopenharmony_ci#!/usr/bin/env perl
2e1051a39Sopenharmony_ci#
3e1051a39Sopenharmony_ci# ====================================================================
4e1051a39Sopenharmony_ci# Written by Andy Polyakov, @dot-asm, initially for use with OpenSSL.
5e1051a39Sopenharmony_ci# ====================================================================
6e1051a39Sopenharmony_ci#
7e1051a39Sopenharmony_ci# ChaCha20 for Itanium.
8e1051a39Sopenharmony_ci#
9e1051a39Sopenharmony_ci# March 2019
10e1051a39Sopenharmony_ci#
11e1051a39Sopenharmony_ci# Itanium 9xxx, which has pair of shifters, manages to process one byte
12e1051a39Sopenharmony_ci# in 9.3 cycles. This aligns perfectly with theoretical estimate.
13e1051a39Sopenharmony_ci# On the other hand, pre-9000 CPU has single shifter and each extr/dep
14e1051a39Sopenharmony_ci# pairs below takes additional cycle. Then final input->xor->output
15e1051a39Sopenharmony_ci# pass runs slower than expected... Overall result is 15.6 cpb, two
16e1051a39Sopenharmony_ci# cycles more than theoretical estimate.
17e1051a39Sopenharmony_ci
18e1051a39Sopenharmony_ci$output = pop and open STDOUT, ">$output";
19e1051a39Sopenharmony_ci
20e1051a39Sopenharmony_cimy @k = map("r$_",(16..31));
21e1051a39Sopenharmony_cimy @x = map("r$_",(38..53));
22e1051a39Sopenharmony_cimy @y = map("r$_",(8..11));
23e1051a39Sopenharmony_cimy @z = map("r$_",(15,35..37));
24e1051a39Sopenharmony_cimy ($out,$inp,$len,$key,$counter) = map("r$_",(32..36));
25e1051a39Sopenharmony_ci
26e1051a39Sopenharmony_ci$code.=<<___;
27e1051a39Sopenharmony_ci#if defined(_HPUX_SOURCE)
28e1051a39Sopenharmony_ci# if !defined(_LP64)
29e1051a39Sopenharmony_ci#  define ADDP  addp4
30e1051a39Sopenharmony_ci# else
31e1051a39Sopenharmony_ci#  define ADDP  add
32e1051a39Sopenharmony_ci# endif
33e1051a39Sopenharmony_ci#else
34e1051a39Sopenharmony_ci# define ADDP   add
35e1051a39Sopenharmony_ci#endif
36e1051a39Sopenharmony_ci
37e1051a39Sopenharmony_ci.text
38e1051a39Sopenharmony_ci
39e1051a39Sopenharmony_ci.global	ChaCha20_ctr32#
40e1051a39Sopenharmony_ci.proc	ChaCha20_ctr32#
41e1051a39Sopenharmony_ci.align	32
42e1051a39Sopenharmony_ciChaCha20_ctr32:
43e1051a39Sopenharmony_ci	.prologue
44e1051a39Sopenharmony_ci	.save		ar.pfs,r2
45e1051a39Sopenharmony_ci{ .mmi;	alloc		r2=ar.pfs,5,17,0,0
46e1051a39Sopenharmony_ci	ADDP		@k[11]=4,$key
47e1051a39Sopenharmony_ci	.save		ar.lc,r3
48e1051a39Sopenharmony_ci	mov		r3=ar.lc		}
49e1051a39Sopenharmony_ci{ .mmi;	ADDP		$key=0,$key
50e1051a39Sopenharmony_ci	ADDP		$counter=0,$counter
51e1051a39Sopenharmony_ci	.save		pr,r14
52e1051a39Sopenharmony_ci	mov		r14=pr			};;
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_ci	.body
55e1051a39Sopenharmony_ci{ .mlx;	ld4		@k[4]=[$key],8
56e1051a39Sopenharmony_ci	movl		@k[0]=0x61707865	}
57e1051a39Sopenharmony_ci{ .mlx;	ld4		@k[5]=[@k[11]],8
58e1051a39Sopenharmony_ci	movl		@k[1]=0x3320646e	};;
59e1051a39Sopenharmony_ci{ .mlx;	ld4		@k[6]=[$key],8
60e1051a39Sopenharmony_ci	movl		@k[2]=0x79622d32	}
61e1051a39Sopenharmony_ci{ .mlx;	ld4		@k[7]=[@k[11]],8
62e1051a39Sopenharmony_ci	movl		@k[3]=0x6b206574	};;
63e1051a39Sopenharmony_ci{ .mmi;	ld4		@k[8]=[$key],8
64e1051a39Sopenharmony_ci	ld4		@k[9]=[@k[11]],8
65e1051a39Sopenharmony_ci	add		@k[15]=4,$counter	};;
66e1051a39Sopenharmony_ci{ .mmi;	ld4		@k[10]=[$key]
67e1051a39Sopenharmony_ci	ld4		@k[11]=[@k[11]]
68e1051a39Sopenharmony_ci	mov		@x[0]=@k[0]		};;
69e1051a39Sopenharmony_ci{ .mmi;	ld4		@k[12]=[$counter],8
70e1051a39Sopenharmony_ci	ld4		@k[13]=[@k[15]],8
71e1051a39Sopenharmony_ci	mov		@x[1]=@k[1]		};;
72e1051a39Sopenharmony_ci{ .mmi;	ld4		@k[14]=[$counter]
73e1051a39Sopenharmony_ci	ld4		@k[15]=[@k[15]]
74e1051a39Sopenharmony_ci	mov		@x[2]=@k[2]		}
75e1051a39Sopenharmony_ci{ .mmi;	mov		@x[3]=@k[3]
76e1051a39Sopenharmony_ci	mov		@x[4]=@k[4]
77e1051a39Sopenharmony_ci	mov		@x[5]=@k[5]		};;
78e1051a39Sopenharmony_ci{ .mmi;	mov		@x[6]=@k[6]
79e1051a39Sopenharmony_ci	mov		@x[7]=@k[7]
80e1051a39Sopenharmony_ci	mov		@x[8]=@k[8]		}
81e1051a39Sopenharmony_ci{ .mmi;	mov		@x[9]=@k[9]
82e1051a39Sopenharmony_ci	mov		@x[10]=@k[10]
83e1051a39Sopenharmony_ci	mov		@x[11]=@k[11]		}
84e1051a39Sopenharmony_ci{ .mmi;	mov		@x[12]=@k[12]
85e1051a39Sopenharmony_ci	mov		@x[13]=@k[13]
86e1051a39Sopenharmony_ci	mov		@x[14]=@k[14]		};;
87e1051a39Sopenharmony_ci
88e1051a39Sopenharmony_ci.Loop_outer:
89e1051a39Sopenharmony_ci{ .mii;	mov		@x[15]=@k[15]
90e1051a39Sopenharmony_ci	mov		ar.lc=9
91e1051a39Sopenharmony_ci	mov		ar.ec=1			}
92e1051a39Sopenharmony_ci{ .mmb;	cmp.geu		p6,p0=64,$len
93e1051a39Sopenharmony_ci	sub		@z[1]=64,$len
94e1051a39Sopenharmony_ci	brp.loop.imp	.Loop_top,.Loop_end-16	};;
95e1051a39Sopenharmony_ci
96e1051a39Sopenharmony_ci.Loop_top:
97e1051a39Sopenharmony_ci___
98e1051a39Sopenharmony_cisub ROUND {
99e1051a39Sopenharmony_cimy ($a0,$b0,$c0,$d0)=@_;
100e1051a39Sopenharmony_cimy ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
101e1051a39Sopenharmony_cimy ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
102e1051a39Sopenharmony_cimy ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
103e1051a39Sopenharmony_ci
104e1051a39Sopenharmony_ci$code.=<<___;
105e1051a39Sopenharmony_ci{ .mmi;	add		@x[$a0]=@x[$a0],@x[$b0]
106e1051a39Sopenharmony_ci	add		@x[$a1]=@x[$a1],@x[$b1]
107e1051a39Sopenharmony_ci	add		@x[$a2]=@x[$a2],@x[$b2]		};;
108e1051a39Sopenharmony_ci{ .mmi;	add		@x[$a3]=@x[$a3],@x[$b3]
109e1051a39Sopenharmony_ci	xor		@x[$d0]=@x[$d0],@x[$a0]
110e1051a39Sopenharmony_ci	xor		@x[$d1]=@x[$d1],@x[$a1]		};;
111e1051a39Sopenharmony_ci{ .mmi;	xor		@x[$d2]=@x[$d2],@x[$a2]
112e1051a39Sopenharmony_ci	xor		@x[$d3]=@x[$d3],@x[$a3]
113e1051a39Sopenharmony_ci	extr.u		@y[0]=@x[$d0],16,16		};;
114e1051a39Sopenharmony_ci{ .mii;	extr.u		@y[1]=@x[$d1],16,16
115e1051a39Sopenharmony_ci	dep		@x[$d0]=@x[$d0],@y[0],16,16	};;
116e1051a39Sopenharmony_ci{ .mii;	 add		@x[$c0]=@x[$c0],@x[$d0]
117e1051a39Sopenharmony_ci	extr.u		@y[2]=@x[$d2],16,16
118e1051a39Sopenharmony_ci	dep		@x[$d1]=@x[$d1],@y[1],16,16	};;
119e1051a39Sopenharmony_ci{ .mii;	 add		@x[$c1]=@x[$c1],@x[$d1]
120e1051a39Sopenharmony_ci	 xor		@x[$b0]=@x[$b0],@x[$c0]
121e1051a39Sopenharmony_ci	extr.u		@y[3]=@x[$d3],16,16		};;
122e1051a39Sopenharmony_ci{ .mii;	 xor		@x[$b1]=@x[$b1],@x[$c1]
123e1051a39Sopenharmony_ci	dep		@x[$d2]=@x[$d2],@y[2],16,16
124e1051a39Sopenharmony_ci	dep		@x[$d3]=@x[$d3],@y[3],16,16	};;
125e1051a39Sopenharmony_ci{ .mmi;	 add		@x[$c2]=@x[$c2],@x[$d2]
126e1051a39Sopenharmony_ci	 add		@x[$c3]=@x[$c3],@x[$d3]
127e1051a39Sopenharmony_ci	 extr.u		@y[0]=@x[$b0],20,12		};;
128e1051a39Sopenharmony_ci{ .mmi;	 xor		@x[$b2]=@x[$b2],@x[$c2]
129e1051a39Sopenharmony_ci	 xor		@x[$b3]=@x[$b3],@x[$c3]
130e1051a39Sopenharmony_ci	 dep.z		@x[$b0]=@x[$b0],12,20		};;
131e1051a39Sopenharmony_ci{ .mii;	 or		@x[$b0]=@x[$b0],@y[0]
132e1051a39Sopenharmony_ci	 extr.u		@y[1]=@x[$b1],20,12
133e1051a39Sopenharmony_ci	 dep.z		@x[$b1]=@x[$b1],12,20		};;
134e1051a39Sopenharmony_ci{ .mii;	add		@x[$a0]=@x[$a0],@x[$b0]
135e1051a39Sopenharmony_ci	 extr.u		@y[2]=@x[$b2],20,12
136e1051a39Sopenharmony_ci	 extr.u		@y[3]=@x[$b3],20,12		}
137e1051a39Sopenharmony_ci{ .mii;	 or		@x[$b1]=@x[$b1],@y[1]
138e1051a39Sopenharmony_ci	 dep.z		@x[$b2]=@x[$b2],12,20
139e1051a39Sopenharmony_ci	 dep.z		@x[$b3]=@x[$b3],12,20		};;
140e1051a39Sopenharmony_ci{ .mmi;	 or		@x[$b2]=@x[$b2],@y[2]
141e1051a39Sopenharmony_ci	 or		@x[$b3]=@x[$b3],@y[3]
142e1051a39Sopenharmony_ci	add		@x[$a1]=@x[$a1],@x[$b1]		};;
143e1051a39Sopenharmony_ci{ .mmi;	add		@x[$a2]=@x[$a2],@x[$b2]
144e1051a39Sopenharmony_ci	add		@x[$a3]=@x[$a3],@x[$b3]
145e1051a39Sopenharmony_ci	xor		@x[$d0]=@x[$d0],@x[$a0]		};;
146e1051a39Sopenharmony_ci{ .mii;	xor		@x[$d1]=@x[$d1],@x[$a1]
147e1051a39Sopenharmony_ci	extr.u		@y[0]=@x[$d0],24,8
148e1051a39Sopenharmony_ci	dep.z		@x[$d0]=@x[$d0],8,24		};;
149e1051a39Sopenharmony_ci{ .mii;	or		@x[$d0]=@x[$d0],@y[0]
150e1051a39Sopenharmony_ci	extr.u		@y[1]=@x[$d1],24,8
151e1051a39Sopenharmony_ci	dep.z		@x[$d1]=@x[$d1],8,24		};;
152e1051a39Sopenharmony_ci{ .mmi;	or		@x[$d1]=@x[$d1],@y[1]
153e1051a39Sopenharmony_ci	xor		@x[$d2]=@x[$d2],@x[$a2]
154e1051a39Sopenharmony_ci	xor		@x[$d3]=@x[$d3],@x[$a3]		};;
155e1051a39Sopenharmony_ci{ .mii;	 add		@x[$c0]=@x[$c0],@x[$d0]
156e1051a39Sopenharmony_ci	extr.u		@y[2]=@x[$d2],24,8
157e1051a39Sopenharmony_ci	dep.z		@x[$d2]=@x[$d2],8,24		};;
158e1051a39Sopenharmony_ci{ .mii;	 xor		@x[$b0]=@x[$b0],@x[$c0]
159e1051a39Sopenharmony_ci	extr.u		@y[3]=@x[$d3],24,8
160e1051a39Sopenharmony_ci	dep.z		@x[$d3]=@x[$d3],8,24		};;
161e1051a39Sopenharmony_ci{ .mmi;	or		@x[$d2]=@x[$d2],@y[2]
162e1051a39Sopenharmony_ci	or		@x[$d3]=@x[$d3],@y[3]
163e1051a39Sopenharmony_ci	 extr.u		@y[0]=@x[$b0],25,7		};;
164e1051a39Sopenharmony_ci{ .mmi;	 add		@x[$c1]=@x[$c1],@x[$d1]
165e1051a39Sopenharmony_ci	 add		@x[$c2]=@x[$c2],@x[$d2]
166e1051a39Sopenharmony_ci	 dep.z		@x[$b0]=@x[$b0],7,25		};;
167e1051a39Sopenharmony_ci{ .mmi;	 xor		@x[$b1]=@x[$b1],@x[$c1]
168e1051a39Sopenharmony_ci	 xor		@x[$b2]=@x[$b2],@x[$c2]
169e1051a39Sopenharmony_ci	 add		@x[$c3]=@x[$c3],@x[$d3]		};;
170e1051a39Sopenharmony_ci{ .mii;	 xor		@x[$b3]=@x[$b3],@x[$c3]
171e1051a39Sopenharmony_ci	 extr.u		@y[1]=@x[$b1],25,7
172e1051a39Sopenharmony_ci	 dep.z		@x[$b1]=@x[$b1],7,25		};;
173e1051a39Sopenharmony_ci{ .mii;	 or		@x[$b0]=@x[$b0],@y[0]
174e1051a39Sopenharmony_ci	 extr.u		@y[2]=@x[$b2],25,7
175e1051a39Sopenharmony_ci	 dep.z		@x[$b2]=@x[$b2],7,25		};;
176e1051a39Sopenharmony_ci{ .mii;	 or		@x[$b1]=@x[$b1],@y[1]
177e1051a39Sopenharmony_ci	 extr.u		@y[3]=@x[$b3],25,7
178e1051a39Sopenharmony_ci	 dep.z		@x[$b3]=@x[$b3],7,25		};;
179e1051a39Sopenharmony_ci___
180e1051a39Sopenharmony_ci$code.=<<___		if ($d0 == 12);
181e1051a39Sopenharmony_ci{ .mmi;	 or		@x[$b2]=@x[$b2],@y[2]
182e1051a39Sopenharmony_ci	 or		@x[$b3]=@x[$b3],@y[3]
183e1051a39Sopenharmony_ci	mov		@z[0]=-1			};;
184e1051a39Sopenharmony_ci___
185e1051a39Sopenharmony_ci$code.=<<___		if ($d0 == 15);
186e1051a39Sopenharmony_ci{ .mmb;	 or		@x[$b2]=@x[$b2],@y[2]
187e1051a39Sopenharmony_ci	 or		@x[$b3]=@x[$b3],@y[3]
188e1051a39Sopenharmony_ci	br.ctop.sptk	.Loop_top			};;
189e1051a39Sopenharmony_ci___
190e1051a39Sopenharmony_ci}
191e1051a39Sopenharmony_ci	&ROUND(0, 4, 8, 12);
192e1051a39Sopenharmony_ci	&ROUND(0, 5, 10, 15);
193e1051a39Sopenharmony_ci$code.=<<___;
194e1051a39Sopenharmony_ci.Loop_end:
195e1051a39Sopenharmony_ci
196e1051a39Sopenharmony_ci{ .mmi;	add		@x[0]=@x[0],@k[0]
197e1051a39Sopenharmony_ci	add		@x[1]=@x[1],@k[1]
198e1051a39Sopenharmony_ci(p6)	shr.u		@z[0]=@z[0],@z[1]		}
199e1051a39Sopenharmony_ci{ .mmb;	add		@x[2]=@x[2],@k[2]
200e1051a39Sopenharmony_ci	add		@x[3]=@x[3],@k[3]
201e1051a39Sopenharmony_ci	clrrrb.pr					};;
202e1051a39Sopenharmony_ci{ .mmi;	add		@x[4]=@x[4],@k[4]
203e1051a39Sopenharmony_ci	add		@x[5]=@x[5],@k[5]
204e1051a39Sopenharmony_ci	add		@x[6]=@x[6],@k[6]		}
205e1051a39Sopenharmony_ci{ .mmi;	add		@x[7]=@x[7],@k[7]
206e1051a39Sopenharmony_ci	add		@x[8]=@x[8],@k[8]
207e1051a39Sopenharmony_ci	add		@x[9]=@x[9],@k[9]		}
208e1051a39Sopenharmony_ci{ .mmi;	add		@x[10]=@x[10],@k[10]
209e1051a39Sopenharmony_ci	add		@x[11]=@x[11],@k[11]
210e1051a39Sopenharmony_ci	add		@x[12]=@x[12],@k[12]		}
211e1051a39Sopenharmony_ci{ .mmi;	add		@x[13]=@x[13],@k[13]
212e1051a39Sopenharmony_ci	add		@x[14]=@x[14],@k[14]
213e1051a39Sopenharmony_ci	add		@x[15]=@x[15],@k[15]		}
214e1051a39Sopenharmony_ci{ .mmi;	add		@k[12]=1,@k[12]			// next counter
215e1051a39Sopenharmony_ci	mov		pr=@z[0],0x1ffff		};;
216e1051a39Sopenharmony_ci
217e1051a39Sopenharmony_ci//////////////////////////////////////////////////////////////////
218e1051a39Sopenharmony_ci// Each predicate bit corresponds to byte to be processed. Note
219e1051a39Sopenharmony_ci// that p0 is wired to 1, but it works out, because there always
220e1051a39Sopenharmony_ci// is at least one byte to process...
221e1051a39Sopenharmony_ci{ .mmi;	(p0)	ld1		@z[0]=[$inp],1
222e1051a39Sopenharmony_ci		shr.u		@y[1]=@x[0],8		};;
223e1051a39Sopenharmony_ci{ .mmi;	(p1)	ld1		@z[1]=[$inp],1
224e1051a39Sopenharmony_ci	(p2)	shr.u		@y[2]=@x[0],16		};;
225e1051a39Sopenharmony_ci{ .mmi;	(p2)	ld1		@z[2]=[$inp],1
226e1051a39Sopenharmony_ci	(p0)	xor		@z[0]=@z[0],@x[0]
227e1051a39Sopenharmony_ci	(p3)	shr.u		@y[3]=@x[0],24		};;
228e1051a39Sopenharmony_ci___
229e1051a39Sopenharmony_cifor(my $i0=0; $i0<60; $i0+=4) {
230e1051a39Sopenharmony_cimy ($i1, $i2, $i3, $i4, $i5, $i6, $i7) = map($i0+$_,(1..7));
231e1051a39Sopenharmony_cimy $k = $i0/4+1;
232e1051a39Sopenharmony_ci
233e1051a39Sopenharmony_ci$code.=<<___;
234e1051a39Sopenharmony_ci{ .mmi;	(p$i3)	ld1		@z[3]=[$inp],1
235e1051a39Sopenharmony_ci	(p$i0)	st1		[$out]=@z[0],1
236e1051a39Sopenharmony_ci	(p$i1)	xor		@z[1]=@z[1],@y[1]	};;
237e1051a39Sopenharmony_ci{ .mmi;	(p$i4)	ld1		@z[0]=[$inp],1
238e1051a39Sopenharmony_ci	(p$i5)	shr.u		@y[1]=@x[$k],8		}
239e1051a39Sopenharmony_ci{ .mmi;	(p$i1)	st1		[$out]=@z[1],1
240e1051a39Sopenharmony_ci	(p$i2)	xor		@z[2]=@z[2],@y[2]
241e1051a39Sopenharmony_ci	(p1)	mov		@x[$k-1]=@k[$k-1]	};;
242e1051a39Sopenharmony_ci{ .mfi;	(p$i5)	ld1		@z[1]=[$inp],1
243e1051a39Sopenharmony_ci	(p$i6)	shr.u		@y[2]=@x[$k],16		}
244e1051a39Sopenharmony_ci{ .mfi;	(p$i2)	st1		[$out]=@z[2],1
245e1051a39Sopenharmony_ci	(p$i3)	xor		@z[3]=@z[3],@y[3]	};;
246e1051a39Sopenharmony_ci{ .mfi;	(p$i6)	ld1		@z[2]=[$inp],1
247e1051a39Sopenharmony_ci	(p$i7)	shr.u		@y[3]=@x[$k],24		}
248e1051a39Sopenharmony_ci___
249e1051a39Sopenharmony_ci$code.=<<___	if ($i0==0);	# p1,p2 are available for reuse in first round
250e1051a39Sopenharmony_ci{ .mmi;	(p$i3)	st1		[$out]=@z[3],1
251e1051a39Sopenharmony_ci	(p$i4)	xor		@z[0]=@z[0],@x[$k]
252e1051a39Sopenharmony_ci		cmp.ltu		p1,p2=64,$len		};;
253e1051a39Sopenharmony_ci___
254e1051a39Sopenharmony_ci$code.=<<___	if ($i0>0);
255e1051a39Sopenharmony_ci{ .mfi;	(p$i3)	st1		[$out]=@z[3],1
256e1051a39Sopenharmony_ci	(p$i4)	xor		@z[0]=@z[0],@x[$k]	};;
257e1051a39Sopenharmony_ci___
258e1051a39Sopenharmony_ci}
259e1051a39Sopenharmony_ci$code.=<<___;
260e1051a39Sopenharmony_ci{ .mmi;	(p63)	ld1		@z[3]=[$inp],1
261e1051a39Sopenharmony_ci	(p60)	st1		[$out]=@z[0],1
262e1051a39Sopenharmony_ci	(p61)	xor		@z[1]=@z[1],@y[1]	};;
263e1051a39Sopenharmony_ci{ .mmi;	(p61)	st1		[$out]=@z[1],1
264e1051a39Sopenharmony_ci	(p62)	xor		@z[2]=@z[2],@y[2]	};;
265e1051a39Sopenharmony_ci{ .mmi;	(p62)	st1		[$out]=@z[2],1
266e1051a39Sopenharmony_ci	(p63)	xor		@z[3]=@z[3],@y[3]
267e1051a39Sopenharmony_ci	(p2)	mov		ar.lc=r3		};;
268e1051a39Sopenharmony_ci{ .mib;	(p63)	st1		[$out]=@z[3],1
269e1051a39Sopenharmony_ci	(p1)	add		$len=-64,$len
270e1051a39Sopenharmony_ci(p1)	br.dptk.many		.Loop_outer		};;
271e1051a39Sopenharmony_ci
272e1051a39Sopenharmony_ci{ .mmi;	mov			@k[4]=0			// wipe key material
273e1051a39Sopenharmony_ci	mov			@k[5]=0
274e1051a39Sopenharmony_ci	mov			@k[6]=0			}
275e1051a39Sopenharmony_ci{ .mmi;	mov			@k[7]=0
276e1051a39Sopenharmony_ci	mov			@k[8]=0
277e1051a39Sopenharmony_ci	mov			@k[9]=0			}
278e1051a39Sopenharmony_ci{ .mmi;	mov			@k[10]=0
279e1051a39Sopenharmony_ci	mov			@k[11]=0
280e1051a39Sopenharmony_ci	mov			@k[12]=0		}
281e1051a39Sopenharmony_ci{ .mmi;	mov			@k[13]=0
282e1051a39Sopenharmony_ci	mov			@k[14]=0
283e1051a39Sopenharmony_ci	mov			@k[15]=0		}
284e1051a39Sopenharmony_ci{ .mib;	mov			pr=r14,0x1ffff
285e1051a39Sopenharmony_ci	br.ret.sptk.many	b0			};;
286e1051a39Sopenharmony_ci.endp	ChaCha20_ctr32#
287e1051a39Sopenharmony_cistringz "ChaCha20 for IA64, CRYPTOGAMS by \@dot-asm"
288e1051a39Sopenharmony_ci___
289e1051a39Sopenharmony_ci
290e1051a39Sopenharmony_ciprint $code;
291e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
292