1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
4  *
5  * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
6  */
7 
8 #include <linux/linkage.h>
9 
10 .file "des3_ede-asm_64.S"
11 .text
12 
13 #define s1 .L_s1
14 #define s2 ((s1) + (64*8))
15 #define s3 ((s2) + (64*8))
16 #define s4 ((s3) + (64*8))
17 #define s5 ((s4) + (64*8))
18 #define s6 ((s5) + (64*8))
19 #define s7 ((s6) + (64*8))
20 #define s8 ((s7) + (64*8))
21 
22 /* register macros */
23 #define CTX %rdi
24 
25 #define RL0 %r8
26 #define RL1 %r9
27 #define RL2 %r10
28 
29 #define RL0d %r8d
30 #define RL1d %r9d
31 #define RL2d %r10d
32 
33 #define RR0 %r11
34 #define RR1 %r12
35 #define RR2 %r13
36 
37 #define RR0d %r11d
38 #define RR1d %r12d
39 #define RR2d %r13d
40 
41 #define RW0 %rax
42 #define RW1 %rbx
43 #define RW2 %rcx
44 
45 #define RW0d %eax
46 #define RW1d %ebx
47 #define RW2d %ecx
48 
49 #define RW0bl %al
50 #define RW1bl %bl
51 #define RW2bl %cl
52 
53 #define RW0bh %ah
54 #define RW1bh %bh
55 #define RW2bh %ch
56 
57 #define RT0 %r15
58 #define RT1 %rsi
59 #define RT2 %r14
60 #define RT3 %rdx
61 
62 #define RT0d %r15d
63 #define RT1d %esi
64 #define RT2d %r14d
65 #define RT3d %edx
66 
67 /***********************************************************************
68  * 1-way 3DES
69  ***********************************************************************/
70 #define do_permutation(a, b, offset, mask) \
71 	movl a, RT0d; \
72 	shrl $(offset), RT0d; \
73 	xorl b, RT0d; \
74 	andl $(mask), RT0d; \
75 	xorl RT0d, b; \
76 	shll $(offset), RT0d; \
77 	xorl RT0d, a;
78 
79 #define expand_to_64bits(val, mask) \
80 	movl val##d, RT0d; \
81 	rorl $4, RT0d; \
82 	shlq $32, RT0; \
83 	orq RT0, val; \
84 	andq mask, val;
85 
86 #define compress_to_64bits(val) \
87 	movq val, RT0; \
88 	shrq $32, RT0; \
89 	roll $4, RT0d; \
90 	orl RT0d, val##d;
91 
92 #define initial_permutation(left, right) \
93 	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
94 	do_permutation(left##d, right##d, 16, 0x0000ffff); \
95 	do_permutation(right##d, left##d,  2, 0x33333333); \
96 	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
97 	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
98 	movl left##d, RW0d; \
99 	roll $1, right##d; \
100 	xorl right##d, RW0d; \
101 	andl $0xaaaaaaaa, RW0d; \
102 	xorl RW0d, left##d; \
103 	xorl RW0d, right##d; \
104 	roll $1, left##d; \
105 	expand_to_64bits(right, RT3); \
106 	expand_to_64bits(left, RT3);
107 
108 #define final_permutation(left, right) \
109 	compress_to_64bits(right); \
110 	compress_to_64bits(left); \
111 	movl right##d, RW0d; \
112 	rorl $1, left##d; \
113 	xorl left##d, RW0d; \
114 	andl $0xaaaaaaaa, RW0d; \
115 	xorl RW0d, right##d; \
116 	xorl RW0d, left##d; \
117 	rorl $1, right##d; \
118 	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
119 	do_permutation(right##d, left##d,  2, 0x33333333); \
120 	do_permutation(left##d, right##d, 16, 0x0000ffff); \
121 	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
122 
123 #define round1(n, from, to, load_next_key) \
124 	xorq from, RW0; \
125 	\
126 	movzbl RW0bl, RT0d; \
127 	movzbl RW0bh, RT1d; \
128 	shrq $16, RW0; \
129 	movzbl RW0bl, RT2d; \
130 	movzbl RW0bh, RT3d; \
131 	shrq $16, RW0; \
132 	movq s8(, RT0, 8), RT0; \
133 	xorq s6(, RT1, 8), to; \
134 	movzbl RW0bl, RL1d; \
135 	movzbl RW0bh, RT1d; \
136 	shrl $16, RW0d; \
137 	xorq s4(, RT2, 8), RT0; \
138 	xorq s2(, RT3, 8), to; \
139 	movzbl RW0bl, RT2d; \
140 	movzbl RW0bh, RT3d; \
141 	xorq s7(, RL1, 8), RT0; \
142 	xorq s5(, RT1, 8), to; \
143 	xorq s3(, RT2, 8), RT0; \
144 	load_next_key(n, RW0); \
145 	xorq RT0, to; \
146 	xorq s1(, RT3, 8), to; \
147 
148 #define load_next_key(n, RWx) \
149 	movq (((n) + 1) * 8)(CTX), RWx;
150 
151 #define dummy2(a, b) /*_*/
152 
153 #define read_block(io, left, right) \
154 	movl    (io), left##d; \
155 	movl   4(io), right##d; \
156 	bswapl left##d; \
157 	bswapl right##d;
158 
159 #define write_block(io, left, right) \
160 	bswapl left##d; \
161 	bswapl right##d; \
162 	movl   left##d,   (io); \
163 	movl   right##d, 4(io);
164 
165 SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
166 	/* input:
167 	 *	%rdi: round keys, CTX
168 	 *	%rsi: dst
169 	 *	%rdx: src
170 	 */
171 	pushq %rbx;
172 	pushq %r12;
173 	pushq %r13;
174 	pushq %r14;
175 	pushq %r15;
176 
177 	pushq %rsi; /* dst */
178 
179 	read_block(%rdx, RL0, RR0);
180 	initial_permutation(RL0, RR0);
181 
182 	movq (CTX), RW0;
183 
184 	round1(0, RR0, RL0, load_next_key);
185 	round1(1, RL0, RR0, load_next_key);
186 	round1(2, RR0, RL0, load_next_key);
187 	round1(3, RL0, RR0, load_next_key);
188 	round1(4, RR0, RL0, load_next_key);
189 	round1(5, RL0, RR0, load_next_key);
190 	round1(6, RR0, RL0, load_next_key);
191 	round1(7, RL0, RR0, load_next_key);
192 	round1(8, RR0, RL0, load_next_key);
193 	round1(9, RL0, RR0, load_next_key);
194 	round1(10, RR0, RL0, load_next_key);
195 	round1(11, RL0, RR0, load_next_key);
196 	round1(12, RR0, RL0, load_next_key);
197 	round1(13, RL0, RR0, load_next_key);
198 	round1(14, RR0, RL0, load_next_key);
199 	round1(15, RL0, RR0, load_next_key);
200 
201 	round1(16+0, RL0, RR0, load_next_key);
202 	round1(16+1, RR0, RL0, load_next_key);
203 	round1(16+2, RL0, RR0, load_next_key);
204 	round1(16+3, RR0, RL0, load_next_key);
205 	round1(16+4, RL0, RR0, load_next_key);
206 	round1(16+5, RR0, RL0, load_next_key);
207 	round1(16+6, RL0, RR0, load_next_key);
208 	round1(16+7, RR0, RL0, load_next_key);
209 	round1(16+8, RL0, RR0, load_next_key);
210 	round1(16+9, RR0, RL0, load_next_key);
211 	round1(16+10, RL0, RR0, load_next_key);
212 	round1(16+11, RR0, RL0, load_next_key);
213 	round1(16+12, RL0, RR0, load_next_key);
214 	round1(16+13, RR0, RL0, load_next_key);
215 	round1(16+14, RL0, RR0, load_next_key);
216 	round1(16+15, RR0, RL0, load_next_key);
217 
218 	round1(32+0, RR0, RL0, load_next_key);
219 	round1(32+1, RL0, RR0, load_next_key);
220 	round1(32+2, RR0, RL0, load_next_key);
221 	round1(32+3, RL0, RR0, load_next_key);
222 	round1(32+4, RR0, RL0, load_next_key);
223 	round1(32+5, RL0, RR0, load_next_key);
224 	round1(32+6, RR0, RL0, load_next_key);
225 	round1(32+7, RL0, RR0, load_next_key);
226 	round1(32+8, RR0, RL0, load_next_key);
227 	round1(32+9, RL0, RR0, load_next_key);
228 	round1(32+10, RR0, RL0, load_next_key);
229 	round1(32+11, RL0, RR0, load_next_key);
230 	round1(32+12, RR0, RL0, load_next_key);
231 	round1(32+13, RL0, RR0, load_next_key);
232 	round1(32+14, RR0, RL0, load_next_key);
233 	round1(32+15, RL0, RR0, dummy2);
234 
235 	final_permutation(RR0, RL0);
236 
237 	popq %rsi /* dst */
238 	write_block(%rsi, RR0, RL0);
239 
240 	popq %r15;
241 	popq %r14;
242 	popq %r13;
243 	popq %r12;
244 	popq %rbx;
245 
246 	RET;
247 SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
248 
249 /***********************************************************************
250  * 3-way 3DES
251  ***********************************************************************/
252 #define expand_to_64bits(val, mask) \
253 	movl val##d, RT0d; \
254 	rorl $4, RT0d; \
255 	shlq $32, RT0; \
256 	orq RT0, val; \
257 	andq mask, val;
258 
259 #define compress_to_64bits(val) \
260 	movq val, RT0; \
261 	shrq $32, RT0; \
262 	roll $4, RT0d; \
263 	orl RT0d, val##d;
264 
265 #define initial_permutation3(left, right) \
266 	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
267 	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
268 	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
269 	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
270 	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
271 	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
272 	    \
273 	do_permutation(right##0d, left##0d,  2, 0x33333333); \
274 	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
275 	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
276 	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
277 	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
278 	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
279 	    \
280 	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
281 	    \
282 	movl left##0d, RW0d; \
283 	roll $1, right##0d; \
284 	xorl right##0d, RW0d; \
285 	andl $0xaaaaaaaa, RW0d; \
286 	xorl RW0d, left##0d; \
287 	xorl RW0d, right##0d; \
288 	roll $1, left##0d; \
289 	expand_to_64bits(right##0, RT3); \
290 	expand_to_64bits(left##0, RT3); \
291 	  movl left##1d, RW1d; \
292 	  roll $1, right##1d; \
293 	  xorl right##1d, RW1d; \
294 	  andl $0xaaaaaaaa, RW1d; \
295 	  xorl RW1d, left##1d; \
296 	  xorl RW1d, right##1d; \
297 	  roll $1, left##1d; \
298 	  expand_to_64bits(right##1, RT3); \
299 	  expand_to_64bits(left##1, RT3); \
300 	    movl left##2d, RW2d; \
301 	    roll $1, right##2d; \
302 	    xorl right##2d, RW2d; \
303 	    andl $0xaaaaaaaa, RW2d; \
304 	    xorl RW2d, left##2d; \
305 	    xorl RW2d, right##2d; \
306 	    roll $1, left##2d; \
307 	    expand_to_64bits(right##2, RT3); \
308 	    expand_to_64bits(left##2, RT3);
309 
310 #define final_permutation3(left, right) \
311 	compress_to_64bits(right##0); \
312 	compress_to_64bits(left##0); \
313 	movl right##0d, RW0d; \
314 	rorl $1, left##0d; \
315 	xorl left##0d, RW0d; \
316 	andl $0xaaaaaaaa, RW0d; \
317 	xorl RW0d, right##0d; \
318 	xorl RW0d, left##0d; \
319 	rorl $1, right##0d; \
320 	  compress_to_64bits(right##1); \
321 	  compress_to_64bits(left##1); \
322 	  movl right##1d, RW1d; \
323 	  rorl $1, left##1d; \
324 	  xorl left##1d, RW1d; \
325 	  andl $0xaaaaaaaa, RW1d; \
326 	  xorl RW1d, right##1d; \
327 	  xorl RW1d, left##1d; \
328 	  rorl $1, right##1d; \
329 	    compress_to_64bits(right##2); \
330 	    compress_to_64bits(left##2); \
331 	    movl right##2d, RW2d; \
332 	    rorl $1, left##2d; \
333 	    xorl left##2d, RW2d; \
334 	    andl $0xaaaaaaaa, RW2d; \
335 	    xorl RW2d, right##2d; \
336 	    xorl RW2d, left##2d; \
337 	    rorl $1, right##2d; \
338 	    \
339 	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
340 	do_permutation(right##0d, left##0d,  2, 0x33333333); \
341 	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
342 	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
343 	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
344 	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
345 	    \
346 	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
347 	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
348 	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
349 	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
350 	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
351 	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
352 
353 #define round3(n, from, to, load_next_key, do_movq) \
354 	xorq from##0, RW0; \
355 	movzbl RW0bl, RT3d; \
356 	movzbl RW0bh, RT1d; \
357 	shrq $16, RW0; \
358 	xorq s8(, RT3, 8), to##0; \
359 	xorq s6(, RT1, 8), to##0; \
360 	movzbl RW0bl, RT3d; \
361 	movzbl RW0bh, RT1d; \
362 	shrq $16, RW0; \
363 	xorq s4(, RT3, 8), to##0; \
364 	xorq s2(, RT1, 8), to##0; \
365 	movzbl RW0bl, RT3d; \
366 	movzbl RW0bh, RT1d; \
367 	shrl $16, RW0d; \
368 	xorq s7(, RT3, 8), to##0; \
369 	xorq s5(, RT1, 8), to##0; \
370 	movzbl RW0bl, RT3d; \
371 	movzbl RW0bh, RT1d; \
372 	load_next_key(n, RW0); \
373 	xorq s3(, RT3, 8), to##0; \
374 	xorq s1(, RT1, 8), to##0; \
375 		xorq from##1, RW1; \
376 		movzbl RW1bl, RT3d; \
377 		movzbl RW1bh, RT1d; \
378 		shrq $16, RW1; \
379 		xorq s8(, RT3, 8), to##1; \
380 		xorq s6(, RT1, 8), to##1; \
381 		movzbl RW1bl, RT3d; \
382 		movzbl RW1bh, RT1d; \
383 		shrq $16, RW1; \
384 		xorq s4(, RT3, 8), to##1; \
385 		xorq s2(, RT1, 8), to##1; \
386 		movzbl RW1bl, RT3d; \
387 		movzbl RW1bh, RT1d; \
388 		shrl $16, RW1d; \
389 		xorq s7(, RT3, 8), to##1; \
390 		xorq s5(, RT1, 8), to##1; \
391 		movzbl RW1bl, RT3d; \
392 		movzbl RW1bh, RT1d; \
393 		do_movq(RW0, RW1); \
394 		xorq s3(, RT3, 8), to##1; \
395 		xorq s1(, RT1, 8), to##1; \
396 			xorq from##2, RW2; \
397 			movzbl RW2bl, RT3d; \
398 			movzbl RW2bh, RT1d; \
399 			shrq $16, RW2; \
400 			xorq s8(, RT3, 8), to##2; \
401 			xorq s6(, RT1, 8), to##2; \
402 			movzbl RW2bl, RT3d; \
403 			movzbl RW2bh, RT1d; \
404 			shrq $16, RW2; \
405 			xorq s4(, RT3, 8), to##2; \
406 			xorq s2(, RT1, 8), to##2; \
407 			movzbl RW2bl, RT3d; \
408 			movzbl RW2bh, RT1d; \
409 			shrl $16, RW2d; \
410 			xorq s7(, RT3, 8), to##2; \
411 			xorq s5(, RT1, 8), to##2; \
412 			movzbl RW2bl, RT3d; \
413 			movzbl RW2bh, RT1d; \
414 			do_movq(RW0, RW2); \
415 			xorq s3(, RT3, 8), to##2; \
416 			xorq s1(, RT1, 8), to##2;
417 
418 #define __movq(src, dst) \
419 	movq src, dst;
420 
421 SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
422 	/* input:
423 	 *	%rdi: ctx, round keys
424 	 *	%rsi: dst (3 blocks)
425 	 *	%rdx: src (3 blocks)
426 	 */
427 
428 	pushq %rbx;
429 	pushq %r12;
430 	pushq %r13;
431 	pushq %r14;
432 	pushq %r15;
433 
434 	pushq %rsi /* dst */
435 
436 	/* load input */
437 	movl 0 * 4(%rdx), RL0d;
438 	movl 1 * 4(%rdx), RR0d;
439 	movl 2 * 4(%rdx), RL1d;
440 	movl 3 * 4(%rdx), RR1d;
441 	movl 4 * 4(%rdx), RL2d;
442 	movl 5 * 4(%rdx), RR2d;
443 
444 	bswapl RL0d;
445 	bswapl RR0d;
446 	bswapl RL1d;
447 	bswapl RR1d;
448 	bswapl RL2d;
449 	bswapl RR2d;
450 
451 	initial_permutation3(RL, RR);
452 
453 	movq 0(CTX), RW0;
454 	movq RW0, RW1;
455 	movq RW0, RW2;
456 
457 	round3(0, RR, RL, load_next_key, __movq);
458 	round3(1, RL, RR, load_next_key, __movq);
459 	round3(2, RR, RL, load_next_key, __movq);
460 	round3(3, RL, RR, load_next_key, __movq);
461 	round3(4, RR, RL, load_next_key, __movq);
462 	round3(5, RL, RR, load_next_key, __movq);
463 	round3(6, RR, RL, load_next_key, __movq);
464 	round3(7, RL, RR, load_next_key, __movq);
465 	round3(8, RR, RL, load_next_key, __movq);
466 	round3(9, RL, RR, load_next_key, __movq);
467 	round3(10, RR, RL, load_next_key, __movq);
468 	round3(11, RL, RR, load_next_key, __movq);
469 	round3(12, RR, RL, load_next_key, __movq);
470 	round3(13, RL, RR, load_next_key, __movq);
471 	round3(14, RR, RL, load_next_key, __movq);
472 	round3(15, RL, RR, load_next_key, __movq);
473 
474 	round3(16+0, RL, RR, load_next_key, __movq);
475 	round3(16+1, RR, RL, load_next_key, __movq);
476 	round3(16+2, RL, RR, load_next_key, __movq);
477 	round3(16+3, RR, RL, load_next_key, __movq);
478 	round3(16+4, RL, RR, load_next_key, __movq);
479 	round3(16+5, RR, RL, load_next_key, __movq);
480 	round3(16+6, RL, RR, load_next_key, __movq);
481 	round3(16+7, RR, RL, load_next_key, __movq);
482 	round3(16+8, RL, RR, load_next_key, __movq);
483 	round3(16+9, RR, RL, load_next_key, __movq);
484 	round3(16+10, RL, RR, load_next_key, __movq);
485 	round3(16+11, RR, RL, load_next_key, __movq);
486 	round3(16+12, RL, RR, load_next_key, __movq);
487 	round3(16+13, RR, RL, load_next_key, __movq);
488 	round3(16+14, RL, RR, load_next_key, __movq);
489 	round3(16+15, RR, RL, load_next_key, __movq);
490 
491 	round3(32+0, RR, RL, load_next_key, __movq);
492 	round3(32+1, RL, RR, load_next_key, __movq);
493 	round3(32+2, RR, RL, load_next_key, __movq);
494 	round3(32+3, RL, RR, load_next_key, __movq);
495 	round3(32+4, RR, RL, load_next_key, __movq);
496 	round3(32+5, RL, RR, load_next_key, __movq);
497 	round3(32+6, RR, RL, load_next_key, __movq);
498 	round3(32+7, RL, RR, load_next_key, __movq);
499 	round3(32+8, RR, RL, load_next_key, __movq);
500 	round3(32+9, RL, RR, load_next_key, __movq);
501 	round3(32+10, RR, RL, load_next_key, __movq);
502 	round3(32+11, RL, RR, load_next_key, __movq);
503 	round3(32+12, RR, RL, load_next_key, __movq);
504 	round3(32+13, RL, RR, load_next_key, __movq);
505 	round3(32+14, RR, RL, load_next_key, __movq);
506 	round3(32+15, RL, RR, dummy2, dummy2);
507 
508 	final_permutation3(RR, RL);
509 
510 	bswapl RR0d;
511 	bswapl RL0d;
512 	bswapl RR1d;
513 	bswapl RL1d;
514 	bswapl RR2d;
515 	bswapl RL2d;
516 
517 	popq %rsi /* dst */
518 	movl RR0d, 0 * 4(%rsi);
519 	movl RL0d, 1 * 4(%rsi);
520 	movl RR1d, 2 * 4(%rsi);
521 	movl RL1d, 3 * 4(%rsi);
522 	movl RR2d, 4 * 4(%rsi);
523 	movl RL2d, 5 * 4(%rsi);
524 
525 	popq %r15;
526 	popq %r14;
527 	popq %r13;
528 	popq %r12;
529 	popq %rbx;
530 
531 	RET;
532 SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
533 
534 .section	.rodata, "a", @progbits
535 .align 16
536 .L_s1:
537 	.quad 0x0010100001010400, 0x0000000000000000
538 	.quad 0x0000100000010000, 0x0010100001010404
539 	.quad 0x0010100001010004, 0x0000100000010404
540 	.quad 0x0000000000000004, 0x0000100000010000
541 	.quad 0x0000000000000400, 0x0010100001010400
542 	.quad 0x0010100001010404, 0x0000000000000400
543 	.quad 0x0010000001000404, 0x0010100001010004
544 	.quad 0x0010000001000000, 0x0000000000000004
545 	.quad 0x0000000000000404, 0x0010000001000400
546 	.quad 0x0010000001000400, 0x0000100000010400
547 	.quad 0x0000100000010400, 0x0010100001010000
548 	.quad 0x0010100001010000, 0x0010000001000404
549 	.quad 0x0000100000010004, 0x0010000001000004
550 	.quad 0x0010000001000004, 0x0000100000010004
551 	.quad 0x0000000000000000, 0x0000000000000404
552 	.quad 0x0000100000010404, 0x0010000001000000
553 	.quad 0x0000100000010000, 0x0010100001010404
554 	.quad 0x0000000000000004, 0x0010100001010000
555 	.quad 0x0010100001010400, 0x0010000001000000
556 	.quad 0x0010000001000000, 0x0000000000000400
557 	.quad 0x0010100001010004, 0x0000100000010000
558 	.quad 0x0000100000010400, 0x0010000001000004
559 	.quad 0x0000000000000400, 0x0000000000000004
560 	.quad 0x0010000001000404, 0x0000100000010404
561 	.quad 0x0010100001010404, 0x0000100000010004
562 	.quad 0x0010100001010000, 0x0010000001000404
563 	.quad 0x0010000001000004, 0x0000000000000404
564 	.quad 0x0000100000010404, 0x0010100001010400
565 	.quad 0x0000000000000404, 0x0010000001000400
566 	.quad 0x0010000001000400, 0x0000000000000000
567 	.quad 0x0000100000010004, 0x0000100000010400
568 	.quad 0x0000000000000000, 0x0010100001010004
569 .L_s2:
570 	.quad 0x0801080200100020, 0x0800080000000000
571 	.quad 0x0000080000000000, 0x0001080200100020
572 	.quad 0x0001000000100000, 0x0000000200000020
573 	.quad 0x0801000200100020, 0x0800080200000020
574 	.quad 0x0800000200000020, 0x0801080200100020
575 	.quad 0x0801080000100000, 0x0800000000000000
576 	.quad 0x0800080000000000, 0x0001000000100000
577 	.quad 0x0000000200000020, 0x0801000200100020
578 	.quad 0x0001080000100000, 0x0001000200100020
579 	.quad 0x0800080200000020, 0x0000000000000000
580 	.quad 0x0800000000000000, 0x0000080000000000
581 	.quad 0x0001080200100020, 0x0801000000100000
582 	.quad 0x0001000200100020, 0x0800000200000020
583 	.quad 0x0000000000000000, 0x0001080000100000
584 	.quad 0x0000080200000020, 0x0801080000100000
585 	.quad 0x0801000000100000, 0x0000080200000020
586 	.quad 0x0000000000000000, 0x0001080200100020
587 	.quad 0x0801000200100020, 0x0001000000100000
588 	.quad 0x0800080200000020, 0x0801000000100000
589 	.quad 0x0801080000100000, 0x0000080000000000
590 	.quad 0x0801000000100000, 0x0800080000000000
591 	.quad 0x0000000200000020, 0x0801080200100020
592 	.quad 0x0001080200100020, 0x0000000200000020
593 	.quad 0x0000080000000000, 0x0800000000000000
594 	.quad 0x0000080200000020, 0x0801080000100000
595 	.quad 0x0001000000100000, 0x0800000200000020
596 	.quad 0x0001000200100020, 0x0800080200000020
597 	.quad 0x0800000200000020, 0x0001000200100020
598 	.quad 0x0001080000100000, 0x0000000000000000
599 	.quad 0x0800080000000000, 0x0000080200000020
600 	.quad 0x0800000000000000, 0x0801000200100020
601 	.quad 0x0801080200100020, 0x0001080000100000
602 .L_s3:
603 	.quad 0x0000002000000208, 0x0000202008020200
604 	.quad 0x0000000000000000, 0x0000200008020008
605 	.quad 0x0000002008000200, 0x0000000000000000
606 	.quad 0x0000202000020208, 0x0000002008000200
607 	.quad 0x0000200000020008, 0x0000000008000008
608 	.quad 0x0000000008000008, 0x0000200000020000
609 	.quad 0x0000202008020208, 0x0000200000020008
610 	.quad 0x0000200008020000, 0x0000002000000208
611 	.quad 0x0000000008000000, 0x0000000000000008
612 	.quad 0x0000202008020200, 0x0000002000000200
613 	.quad 0x0000202000020200, 0x0000200008020000
614 	.quad 0x0000200008020008, 0x0000202000020208
615 	.quad 0x0000002008000208, 0x0000202000020200
616 	.quad 0x0000200000020000, 0x0000002008000208
617 	.quad 0x0000000000000008, 0x0000202008020208
618 	.quad 0x0000002000000200, 0x0000000008000000
619 	.quad 0x0000202008020200, 0x0000000008000000
620 	.quad 0x0000200000020008, 0x0000002000000208
621 	.quad 0x0000200000020000, 0x0000202008020200
622 	.quad 0x0000002008000200, 0x0000000000000000
623 	.quad 0x0000002000000200, 0x0000200000020008
624 	.quad 0x0000202008020208, 0x0000002008000200
625 	.quad 0x0000000008000008, 0x0000002000000200
626 	.quad 0x0000000000000000, 0x0000200008020008
627 	.quad 0x0000002008000208, 0x0000200000020000
628 	.quad 0x0000000008000000, 0x0000202008020208
629 	.quad 0x0000000000000008, 0x0000202000020208
630 	.quad 0x0000202000020200, 0x0000000008000008
631 	.quad 0x0000200008020000, 0x0000002008000208
632 	.quad 0x0000002000000208, 0x0000200008020000
633 	.quad 0x0000202000020208, 0x0000000000000008
634 	.quad 0x0000200008020008, 0x0000202000020200
635 .L_s4:
636 	.quad 0x1008020000002001, 0x1000020800002001
637 	.quad 0x1000020800002001, 0x0000000800000000
638 	.quad 0x0008020800002000, 0x1008000800000001
639 	.quad 0x1008000000000001, 0x1000020000002001
640 	.quad 0x0000000000000000, 0x0008020000002000
641 	.quad 0x0008020000002000, 0x1008020800002001
642 	.quad 0x1000000800000001, 0x0000000000000000
643 	.quad 0x0008000800000000, 0x1008000000000001
644 	.quad 0x1000000000000001, 0x0000020000002000
645 	.quad 0x0008000000000000, 0x1008020000002001
646 	.quad 0x0000000800000000, 0x0008000000000000
647 	.quad 0x1000020000002001, 0x0000020800002000
648 	.quad 0x1008000800000001, 0x1000000000000001
649 	.quad 0x0000020800002000, 0x0008000800000000
650 	.quad 0x0000020000002000, 0x0008020800002000
651 	.quad 0x1008020800002001, 0x1000000800000001
652 	.quad 0x0008000800000000, 0x1008000000000001
653 	.quad 0x0008020000002000, 0x1008020800002001
654 	.quad 0x1000000800000001, 0x0000000000000000
655 	.quad 0x0000000000000000, 0x0008020000002000
656 	.quad 0x0000020800002000, 0x0008000800000000
657 	.quad 0x1008000800000001, 0x1000000000000001
658 	.quad 0x1008020000002001, 0x1000020800002001
659 	.quad 0x1000020800002001, 0x0000000800000000
660 	.quad 0x1008020800002001, 0x1000000800000001
661 	.quad 0x1000000000000001, 0x0000020000002000
662 	.quad 0x1008000000000001, 0x1000020000002001
663 	.quad 0x0008020800002000, 0x1008000800000001
664 	.quad 0x1000020000002001, 0x0000020800002000
665 	.quad 0x0008000000000000, 0x1008020000002001
666 	.quad 0x0000000800000000, 0x0008000000000000
667 	.quad 0x0000020000002000, 0x0008020800002000
668 .L_s5:
669 	.quad 0x0000001000000100, 0x0020001002080100
670 	.quad 0x0020000002080000, 0x0420001002000100
671 	.quad 0x0000000000080000, 0x0000001000000100
672 	.quad 0x0400000000000000, 0x0020000002080000
673 	.quad 0x0400001000080100, 0x0000000000080000
674 	.quad 0x0020001002000100, 0x0400001000080100
675 	.quad 0x0420001002000100, 0x0420000002080000
676 	.quad 0x0000001000080100, 0x0400000000000000
677 	.quad 0x0020000002000000, 0x0400000000080000
678 	.quad 0x0400000000080000, 0x0000000000000000
679 	.quad 0x0400001000000100, 0x0420001002080100
680 	.quad 0x0420001002080100, 0x0020001002000100
681 	.quad 0x0420000002080000, 0x0400001000000100
682 	.quad 0x0000000000000000, 0x0420000002000000
683 	.quad 0x0020001002080100, 0x0020000002000000
684 	.quad 0x0420000002000000, 0x0000001000080100
685 	.quad 0x0000000000080000, 0x0420001002000100
686 	.quad 0x0000001000000100, 0x0020000002000000
687 	.quad 0x0400000000000000, 0x0020000002080000
688 	.quad 0x0420001002000100, 0x0400001000080100
689 	.quad 0x0020001002000100, 0x0400000000000000
690 	.quad 0x0420000002080000, 0x0020001002080100
691 	.quad 0x0400001000080100, 0x0000001000000100
692 	.quad 0x0020000002000000, 0x0420000002080000
693 	.quad 0x0420001002080100, 0x0000001000080100
694 	.quad 0x0420000002000000, 0x0420001002080100
695 	.quad 0x0020000002080000, 0x0000000000000000
696 	.quad 0x0400000000080000, 0x0420000002000000
697 	.quad 0x0000001000080100, 0x0020001002000100
698 	.quad 0x0400001000000100, 0x0000000000080000
699 	.quad 0x0000000000000000, 0x0400000000080000
700 	.quad 0x0020001002080100, 0x0400001000000100
701 .L_s6:
702 	.quad 0x0200000120000010, 0x0204000020000000
703 	.quad 0x0000040000000000, 0x0204040120000010
704 	.quad 0x0204000020000000, 0x0000000100000010
705 	.quad 0x0204040120000010, 0x0004000000000000
706 	.quad 0x0200040020000000, 0x0004040100000010
707 	.quad 0x0004000000000000, 0x0200000120000010
708 	.quad 0x0004000100000010, 0x0200040020000000
709 	.quad 0x0200000020000000, 0x0000040100000010
710 	.quad 0x0000000000000000, 0x0004000100000010
711 	.quad 0x0200040120000010, 0x0000040000000000
712 	.quad 0x0004040000000000, 0x0200040120000010
713 	.quad 0x0000000100000010, 0x0204000120000010
714 	.quad 0x0204000120000010, 0x0000000000000000
715 	.quad 0x0004040100000010, 0x0204040020000000
716 	.quad 0x0000040100000010, 0x0004040000000000
717 	.quad 0x0204040020000000, 0x0200000020000000
718 	.quad 0x0200040020000000, 0x0000000100000010
719 	.quad 0x0204000120000010, 0x0004040000000000
720 	.quad 0x0204040120000010, 0x0004000000000000
721 	.quad 0x0000040100000010, 0x0200000120000010
722 	.quad 0x0004000000000000, 0x0200040020000000
723 	.quad 0x0200000020000000, 0x0000040100000010
724 	.quad 0x0200000120000010, 0x0204040120000010
725 	.quad 0x0004040000000000, 0x0204000020000000
726 	.quad 0x0004040100000010, 0x0204040020000000
727 	.quad 0x0000000000000000, 0x0204000120000010
728 	.quad 0x0000000100000010, 0x0000040000000000
729 	.quad 0x0204000020000000, 0x0004040100000010
730 	.quad 0x0000040000000000, 0x0004000100000010
731 	.quad 0x0200040120000010, 0x0000000000000000
732 	.quad 0x0204040020000000, 0x0200000020000000
733 	.quad 0x0004000100000010, 0x0200040120000010
734 .L_s7:
735 	.quad 0x0002000000200000, 0x2002000004200002
736 	.quad 0x2000000004000802, 0x0000000000000000
737 	.quad 0x0000000000000800, 0x2000000004000802
738 	.quad 0x2002000000200802, 0x0002000004200800
739 	.quad 0x2002000004200802, 0x0002000000200000
740 	.quad 0x0000000000000000, 0x2000000004000002
741 	.quad 0x2000000000000002, 0x0000000004000000
742 	.quad 0x2002000004200002, 0x2000000000000802
743 	.quad 0x0000000004000800, 0x2002000000200802
744 	.quad 0x2002000000200002, 0x0000000004000800
745 	.quad 0x2000000004000002, 0x0002000004200000
746 	.quad 0x0002000004200800, 0x2002000000200002
747 	.quad 0x0002000004200000, 0x0000000000000800
748 	.quad 0x2000000000000802, 0x2002000004200802
749 	.quad 0x0002000000200800, 0x2000000000000002
750 	.quad 0x0000000004000000, 0x0002000000200800
751 	.quad 0x0000000004000000, 0x0002000000200800
752 	.quad 0x0002000000200000, 0x2000000004000802
753 	.quad 0x2000000004000802, 0x2002000004200002
754 	.quad 0x2002000004200002, 0x2000000000000002
755 	.quad 0x2002000000200002, 0x0000000004000000
756 	.quad 0x0000000004000800, 0x0002000000200000
757 	.quad 0x0002000004200800, 0x2000000000000802
758 	.quad 0x2002000000200802, 0x0002000004200800
759 	.quad 0x2000000000000802, 0x2000000004000002
760 	.quad 0x2002000004200802, 0x0002000004200000
761 	.quad 0x0002000000200800, 0x0000000000000000
762 	.quad 0x2000000000000002, 0x2002000004200802
763 	.quad 0x0000000000000000, 0x2002000000200802
764 	.quad 0x0002000004200000, 0x0000000000000800
765 	.quad 0x2000000004000002, 0x0000000004000800
766 	.quad 0x0000000000000800, 0x2002000000200002
767 .L_s8:
768 	.quad 0x0100010410001000, 0x0000010000001000
769 	.quad 0x0000000000040000, 0x0100010410041000
770 	.quad 0x0100000010000000, 0x0100010410001000
771 	.quad 0x0000000400000000, 0x0100000010000000
772 	.quad 0x0000000400040000, 0x0100000010040000
773 	.quad 0x0100010410041000, 0x0000010000041000
774 	.quad 0x0100010010041000, 0x0000010400041000
775 	.quad 0x0000010000001000, 0x0000000400000000
776 	.quad 0x0100000010040000, 0x0100000410000000
777 	.quad 0x0100010010001000, 0x0000010400001000
778 	.quad 0x0000010000041000, 0x0000000400040000
779 	.quad 0x0100000410040000, 0x0100010010041000
780 	.quad 0x0000010400001000, 0x0000000000000000
781 	.quad 0x0000000000000000, 0x0100000410040000
782 	.quad 0x0100000410000000, 0x0100010010001000
783 	.quad 0x0000010400041000, 0x0000000000040000
784 	.quad 0x0000010400041000, 0x0000000000040000
785 	.quad 0x0100010010041000, 0x0000010000001000
786 	.quad 0x0000000400000000, 0x0100000410040000
787 	.quad 0x0000010000001000, 0x0000010400041000
788 	.quad 0x0100010010001000, 0x0000000400000000
789 	.quad 0x0100000410000000, 0x0100000010040000
790 	.quad 0x0100000410040000, 0x0100000010000000
791 	.quad 0x0000000000040000, 0x0100010410001000
792 	.quad 0x0000000000000000, 0x0100010410041000
793 	.quad 0x0000000400040000, 0x0100000410000000
794 	.quad 0x0100000010040000, 0x0100010010001000
795 	.quad 0x0100010410001000, 0x0000000000000000
796 	.quad 0x0100010410041000, 0x0000010000041000
797 	.quad 0x0000010000041000, 0x0000010400001000
798 	.quad 0x0000010400001000, 0x0000000400040000
799 	.quad 0x0100000010000000, 0x0100010010041000
800