18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Serpent Cipher 4-way parallel algorithm (i586/SSE2)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Based on crypto/serpent.c by
88c2ecf20Sopenharmony_ci *  Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
98c2ecf20Sopenharmony_ci *                2003 Herbert Valerio Riedel <hvr@gnu.org>
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/linkage.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci.file "serpent-sse2-i586-asm_32.S"
158c2ecf20Sopenharmony_ci.text
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#define arg_ctx 4
188c2ecf20Sopenharmony_ci#define arg_dst 8
198c2ecf20Sopenharmony_ci#define arg_src 12
208c2ecf20Sopenharmony_ci#define arg_xor 16
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/**********************************************************************
238c2ecf20Sopenharmony_ci  4-way SSE2 serpent
248c2ecf20Sopenharmony_ci **********************************************************************/
258c2ecf20Sopenharmony_ci#define CTX %edx
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#define RA %xmm0
288c2ecf20Sopenharmony_ci#define RB %xmm1
298c2ecf20Sopenharmony_ci#define RC %xmm2
308c2ecf20Sopenharmony_ci#define RD %xmm3
318c2ecf20Sopenharmony_ci#define RE %xmm4
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#define RT0 %xmm5
348c2ecf20Sopenharmony_ci#define RT1 %xmm6
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci#define RNOT %xmm7
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#define get_key(i, j, t) \
398c2ecf20Sopenharmony_ci	movd (4*(i)+(j))*4(CTX), t; \
408c2ecf20Sopenharmony_ci	pshufd $0, t, t;
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci#define K(x0, x1, x2, x3, x4, i) \
438c2ecf20Sopenharmony_ci	get_key(i, 0, x4); \
448c2ecf20Sopenharmony_ci	get_key(i, 1, RT0); \
458c2ecf20Sopenharmony_ci	get_key(i, 2, RT1); \
468c2ecf20Sopenharmony_ci	pxor x4,		x0; \
478c2ecf20Sopenharmony_ci	pxor RT0,		x1; \
488c2ecf20Sopenharmony_ci	pxor RT1,		x2; \
498c2ecf20Sopenharmony_ci	get_key(i, 3, x4); \
508c2ecf20Sopenharmony_ci	pxor x4,		x3;
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci#define LK(x0, x1, x2, x3, x4, i) \
538c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
548c2ecf20Sopenharmony_ci	pslld $13,		x0; \
558c2ecf20Sopenharmony_ci	psrld $(32 - 13),	x4; \
568c2ecf20Sopenharmony_ci	por x4,			x0; \
578c2ecf20Sopenharmony_ci	pxor x0,		x1; \
588c2ecf20Sopenharmony_ci	movdqa x2,		x4; \
598c2ecf20Sopenharmony_ci	pslld $3,		x2; \
608c2ecf20Sopenharmony_ci	psrld $(32 - 3),	x4; \
618c2ecf20Sopenharmony_ci	por x4,			x2; \
628c2ecf20Sopenharmony_ci	pxor x2,		x1; \
638c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
648c2ecf20Sopenharmony_ci	pslld $1,		x1; \
658c2ecf20Sopenharmony_ci	psrld $(32 - 1),	x4; \
668c2ecf20Sopenharmony_ci	por x4,			x1; \
678c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
688c2ecf20Sopenharmony_ci	pslld $3,		x4; \
698c2ecf20Sopenharmony_ci	pxor x2,		x3; \
708c2ecf20Sopenharmony_ci	pxor x4,		x3; \
718c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
728c2ecf20Sopenharmony_ci	pslld $7,		x3; \
738c2ecf20Sopenharmony_ci	psrld $(32 - 7),	x4; \
748c2ecf20Sopenharmony_ci	por x4,			x3; \
758c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
768c2ecf20Sopenharmony_ci	pslld $7,		x4; \
778c2ecf20Sopenharmony_ci	pxor x1,		x0; \
788c2ecf20Sopenharmony_ci	pxor x3,		x0; \
798c2ecf20Sopenharmony_ci	pxor x3,		x2; \
808c2ecf20Sopenharmony_ci	pxor x4,		x2; \
818c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
828c2ecf20Sopenharmony_ci	get_key(i, 1, RT0); \
838c2ecf20Sopenharmony_ci	pxor RT0,		x1; \
848c2ecf20Sopenharmony_ci	get_key(i, 3, RT0); \
858c2ecf20Sopenharmony_ci	pxor RT0,		x3; \
868c2ecf20Sopenharmony_ci	pslld $5,		x0; \
878c2ecf20Sopenharmony_ci	psrld $(32 - 5),	x4; \
888c2ecf20Sopenharmony_ci	por x4,			x0; \
898c2ecf20Sopenharmony_ci	movdqa x2,		x4; \
908c2ecf20Sopenharmony_ci	pslld $22,		x2; \
918c2ecf20Sopenharmony_ci	psrld $(32 - 22),	x4; \
928c2ecf20Sopenharmony_ci	por x4,			x2; \
938c2ecf20Sopenharmony_ci	get_key(i, 0, RT0); \
948c2ecf20Sopenharmony_ci	pxor RT0,		x0; \
958c2ecf20Sopenharmony_ci	get_key(i, 2, RT0); \
968c2ecf20Sopenharmony_ci	pxor RT0,		x2;
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci#define KL(x0, x1, x2, x3, x4, i) \
998c2ecf20Sopenharmony_ci	K(x0, x1, x2, x3, x4, i); \
1008c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
1018c2ecf20Sopenharmony_ci	psrld $5,		x0; \
1028c2ecf20Sopenharmony_ci	pslld $(32 - 5),	x4; \
1038c2ecf20Sopenharmony_ci	por x4,			x0; \
1048c2ecf20Sopenharmony_ci	movdqa x2,		x4; \
1058c2ecf20Sopenharmony_ci	psrld $22,		x2; \
1068c2ecf20Sopenharmony_ci	pslld $(32 - 22),	x4; \
1078c2ecf20Sopenharmony_ci	por x4,			x2; \
1088c2ecf20Sopenharmony_ci	pxor x3,		x2; \
1098c2ecf20Sopenharmony_ci	pxor x3,		x0; \
1108c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
1118c2ecf20Sopenharmony_ci	pslld $7,		x4; \
1128c2ecf20Sopenharmony_ci	pxor x1,		x0; \
1138c2ecf20Sopenharmony_ci	pxor x4,		x2; \
1148c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
1158c2ecf20Sopenharmony_ci	psrld $1,		x1; \
1168c2ecf20Sopenharmony_ci	pslld $(32 - 1),	x4; \
1178c2ecf20Sopenharmony_ci	por x4,			x1; \
1188c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
1198c2ecf20Sopenharmony_ci	psrld $7,		x3; \
1208c2ecf20Sopenharmony_ci	pslld $(32 - 7),	x4; \
1218c2ecf20Sopenharmony_ci	por x4,			x3; \
1228c2ecf20Sopenharmony_ci	pxor x0,		x1; \
1238c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
1248c2ecf20Sopenharmony_ci	pslld $3,		x4; \
1258c2ecf20Sopenharmony_ci	pxor x4,		x3; \
1268c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
1278c2ecf20Sopenharmony_ci	psrld $13,		x0; \
1288c2ecf20Sopenharmony_ci	pslld $(32 - 13),	x4; \
1298c2ecf20Sopenharmony_ci	por x4,			x0; \
1308c2ecf20Sopenharmony_ci	pxor x2,		x1; \
1318c2ecf20Sopenharmony_ci	pxor x2,		x3; \
1328c2ecf20Sopenharmony_ci	movdqa x2,		x4; \
1338c2ecf20Sopenharmony_ci	psrld $3,		x2; \
1348c2ecf20Sopenharmony_ci	pslld $(32 - 3),	x4; \
1358c2ecf20Sopenharmony_ci	por x4,			x2;
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci#define S0(x0, x1, x2, x3, x4) \
1388c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
1398c2ecf20Sopenharmony_ci	por x0,			x3; \
1408c2ecf20Sopenharmony_ci	pxor x4,		x0; \
1418c2ecf20Sopenharmony_ci	pxor x2,		x4; \
1428c2ecf20Sopenharmony_ci	pxor RNOT,		x4; \
1438c2ecf20Sopenharmony_ci	pxor x1,		x3; \
1448c2ecf20Sopenharmony_ci	pand x0,		x1; \
1458c2ecf20Sopenharmony_ci	pxor x4,		x1; \
1468c2ecf20Sopenharmony_ci	pxor x0,		x2; \
1478c2ecf20Sopenharmony_ci	pxor x3,		x0; \
1488c2ecf20Sopenharmony_ci	por x0,			x4; \
1498c2ecf20Sopenharmony_ci	pxor x2,		x0; \
1508c2ecf20Sopenharmony_ci	pand x1,		x2; \
1518c2ecf20Sopenharmony_ci	pxor x2,		x3; \
1528c2ecf20Sopenharmony_ci	pxor RNOT,		x1; \
1538c2ecf20Sopenharmony_ci	pxor x4,		x2; \
1548c2ecf20Sopenharmony_ci	pxor x2,		x1;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci#define S1(x0, x1, x2, x3, x4) \
1578c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
1588c2ecf20Sopenharmony_ci	pxor x0,		x1; \
1598c2ecf20Sopenharmony_ci	pxor x3,		x0; \
1608c2ecf20Sopenharmony_ci	pxor RNOT,		x3; \
1618c2ecf20Sopenharmony_ci	pand x1,		x4; \
1628c2ecf20Sopenharmony_ci	por x1,			x0; \
1638c2ecf20Sopenharmony_ci	pxor x2,		x3; \
1648c2ecf20Sopenharmony_ci	pxor x3,		x0; \
1658c2ecf20Sopenharmony_ci	pxor x3,		x1; \
1668c2ecf20Sopenharmony_ci	pxor x4,		x3; \
1678c2ecf20Sopenharmony_ci	por x4,			x1; \
1688c2ecf20Sopenharmony_ci	pxor x2,		x4; \
1698c2ecf20Sopenharmony_ci	pand x0,		x2; \
1708c2ecf20Sopenharmony_ci	pxor x1,		x2; \
1718c2ecf20Sopenharmony_ci	por x0,			x1; \
1728c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
1738c2ecf20Sopenharmony_ci	pxor x2,		x0; \
1748c2ecf20Sopenharmony_ci	pxor x1,		x4;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci#define S2(x0, x1, x2, x3, x4) \
1778c2ecf20Sopenharmony_ci	pxor RNOT,		x3; \
1788c2ecf20Sopenharmony_ci	pxor x0,		x1; \
1798c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
1808c2ecf20Sopenharmony_ci	pand x2,		x0; \
1818c2ecf20Sopenharmony_ci	pxor x3,		x0; \
1828c2ecf20Sopenharmony_ci	por x4,			x3; \
1838c2ecf20Sopenharmony_ci	pxor x1,		x2; \
1848c2ecf20Sopenharmony_ci	pxor x1,		x3; \
1858c2ecf20Sopenharmony_ci	pand x0,		x1; \
1868c2ecf20Sopenharmony_ci	pxor x2,		x0; \
1878c2ecf20Sopenharmony_ci	pand x3,		x2; \
1888c2ecf20Sopenharmony_ci	por x1,			x3; \
1898c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
1908c2ecf20Sopenharmony_ci	pxor x0,		x3; \
1918c2ecf20Sopenharmony_ci	pxor x0,		x4; \
1928c2ecf20Sopenharmony_ci	pxor x2,		x0; \
1938c2ecf20Sopenharmony_ci	por x2,			x1;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci#define S3(x0, x1, x2, x3, x4) \
1968c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
1978c2ecf20Sopenharmony_ci	pxor x3,		x1; \
1988c2ecf20Sopenharmony_ci	por x0,			x3; \
1998c2ecf20Sopenharmony_ci	pand x0,		x4; \
2008c2ecf20Sopenharmony_ci	pxor x2,		x0; \
2018c2ecf20Sopenharmony_ci	pxor x1,		x2; \
2028c2ecf20Sopenharmony_ci	pand x3,		x1; \
2038c2ecf20Sopenharmony_ci	pxor x3,		x2; \
2048c2ecf20Sopenharmony_ci	por x4,			x0; \
2058c2ecf20Sopenharmony_ci	pxor x3,		x4; \
2068c2ecf20Sopenharmony_ci	pxor x0,		x1; \
2078c2ecf20Sopenharmony_ci	pand x3,		x0; \
2088c2ecf20Sopenharmony_ci	pand x4,		x3; \
2098c2ecf20Sopenharmony_ci	pxor x2,		x3; \
2108c2ecf20Sopenharmony_ci	por x1,			x4; \
2118c2ecf20Sopenharmony_ci	pand x1,		x2; \
2128c2ecf20Sopenharmony_ci	pxor x3,		x4; \
2138c2ecf20Sopenharmony_ci	pxor x3,		x0; \
2148c2ecf20Sopenharmony_ci	pxor x2,		x3;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci#define S4(x0, x1, x2, x3, x4) \
2178c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
2188c2ecf20Sopenharmony_ci	pand x0,		x3; \
2198c2ecf20Sopenharmony_ci	pxor x4,		x0; \
2208c2ecf20Sopenharmony_ci	pxor x2,		x3; \
2218c2ecf20Sopenharmony_ci	por x4,			x2; \
2228c2ecf20Sopenharmony_ci	pxor x1,		x0; \
2238c2ecf20Sopenharmony_ci	pxor x3,		x4; \
2248c2ecf20Sopenharmony_ci	por x0,			x2; \
2258c2ecf20Sopenharmony_ci	pxor x1,		x2; \
2268c2ecf20Sopenharmony_ci	pand x0,		x1; \
2278c2ecf20Sopenharmony_ci	pxor x4,		x1; \
2288c2ecf20Sopenharmony_ci	pand x2,		x4; \
2298c2ecf20Sopenharmony_ci	pxor x3,		x2; \
2308c2ecf20Sopenharmony_ci	pxor x0,		x4; \
2318c2ecf20Sopenharmony_ci	por x1,			x3; \
2328c2ecf20Sopenharmony_ci	pxor RNOT,		x1; \
2338c2ecf20Sopenharmony_ci	pxor x0,		x3;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci#define S5(x0, x1, x2, x3, x4) \
2368c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
2378c2ecf20Sopenharmony_ci	por x0,			x1; \
2388c2ecf20Sopenharmony_ci	pxor x1,		x2; \
2398c2ecf20Sopenharmony_ci	pxor RNOT,		x3; \
2408c2ecf20Sopenharmony_ci	pxor x0,		x4; \
2418c2ecf20Sopenharmony_ci	pxor x2,		x0; \
2428c2ecf20Sopenharmony_ci	pand x4,		x1; \
2438c2ecf20Sopenharmony_ci	por x3,			x4; \
2448c2ecf20Sopenharmony_ci	pxor x0,		x4; \
2458c2ecf20Sopenharmony_ci	pand x3,		x0; \
2468c2ecf20Sopenharmony_ci	pxor x3,		x1; \
2478c2ecf20Sopenharmony_ci	pxor x2,		x3; \
2488c2ecf20Sopenharmony_ci	pxor x1,		x0; \
2498c2ecf20Sopenharmony_ci	pand x4,		x2; \
2508c2ecf20Sopenharmony_ci	pxor x2,		x1; \
2518c2ecf20Sopenharmony_ci	pand x0,		x2; \
2528c2ecf20Sopenharmony_ci	pxor x2,		x3;
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci#define S6(x0, x1, x2, x3, x4) \
2558c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
2568c2ecf20Sopenharmony_ci	pxor x0,		x3; \
2578c2ecf20Sopenharmony_ci	pxor x2,		x1; \
2588c2ecf20Sopenharmony_ci	pxor x0,		x2; \
2598c2ecf20Sopenharmony_ci	pand x3,		x0; \
2608c2ecf20Sopenharmony_ci	por x3,			x1; \
2618c2ecf20Sopenharmony_ci	pxor RNOT,		x4; \
2628c2ecf20Sopenharmony_ci	pxor x1,		x0; \
2638c2ecf20Sopenharmony_ci	pxor x2,		x1; \
2648c2ecf20Sopenharmony_ci	pxor x4,		x3; \
2658c2ecf20Sopenharmony_ci	pxor x0,		x4; \
2668c2ecf20Sopenharmony_ci	pand x0,		x2; \
2678c2ecf20Sopenharmony_ci	pxor x1,		x4; \
2688c2ecf20Sopenharmony_ci	pxor x3,		x2; \
2698c2ecf20Sopenharmony_ci	pand x1,		x3; \
2708c2ecf20Sopenharmony_ci	pxor x0,		x3; \
2718c2ecf20Sopenharmony_ci	pxor x2,		x1;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci#define S7(x0, x1, x2, x3, x4) \
2748c2ecf20Sopenharmony_ci	pxor RNOT,		x1; \
2758c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
2768c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
2778c2ecf20Sopenharmony_ci	pand x2,		x1; \
2788c2ecf20Sopenharmony_ci	pxor x3,		x1; \
2798c2ecf20Sopenharmony_ci	por x4,			x3; \
2808c2ecf20Sopenharmony_ci	pxor x2,		x4; \
2818c2ecf20Sopenharmony_ci	pxor x3,		x2; \
2828c2ecf20Sopenharmony_ci	pxor x0,		x3; \
2838c2ecf20Sopenharmony_ci	por x1,			x0; \
2848c2ecf20Sopenharmony_ci	pand x0,		x2; \
2858c2ecf20Sopenharmony_ci	pxor x4,		x0; \
2868c2ecf20Sopenharmony_ci	pxor x3,		x4; \
2878c2ecf20Sopenharmony_ci	pand x0,		x3; \
2888c2ecf20Sopenharmony_ci	pxor x1,		x4; \
2898c2ecf20Sopenharmony_ci	pxor x4,		x2; \
2908c2ecf20Sopenharmony_ci	pxor x1,		x3; \
2918c2ecf20Sopenharmony_ci	por x0,			x4; \
2928c2ecf20Sopenharmony_ci	pxor x1,		x4;
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci#define SI0(x0, x1, x2, x3, x4) \
2958c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
2968c2ecf20Sopenharmony_ci	pxor x0,		x1; \
2978c2ecf20Sopenharmony_ci	por x1,			x3; \
2988c2ecf20Sopenharmony_ci	pxor x1,		x4; \
2998c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
3008c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3018c2ecf20Sopenharmony_ci	pxor x0,		x3; \
3028c2ecf20Sopenharmony_ci	pand x1,		x0; \
3038c2ecf20Sopenharmony_ci	pxor x2,		x0; \
3048c2ecf20Sopenharmony_ci	pand x3,		x2; \
3058c2ecf20Sopenharmony_ci	pxor x4,		x3; \
3068c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3078c2ecf20Sopenharmony_ci	pxor x3,		x1; \
3088c2ecf20Sopenharmony_ci	pand x0,		x3; \
3098c2ecf20Sopenharmony_ci	pxor x0,		x1; \
3108c2ecf20Sopenharmony_ci	pxor x2,		x0; \
3118c2ecf20Sopenharmony_ci	pxor x3,		x4;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci#define SI1(x0, x1, x2, x3, x4) \
3148c2ecf20Sopenharmony_ci	pxor x3,		x1; \
3158c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
3168c2ecf20Sopenharmony_ci	pxor x2,		x0; \
3178c2ecf20Sopenharmony_ci	pxor RNOT,		x2; \
3188c2ecf20Sopenharmony_ci	por x1,			x4; \
3198c2ecf20Sopenharmony_ci	pxor x3,		x4; \
3208c2ecf20Sopenharmony_ci	pand x1,		x3; \
3218c2ecf20Sopenharmony_ci	pxor x2,		x1; \
3228c2ecf20Sopenharmony_ci	pand x4,		x2; \
3238c2ecf20Sopenharmony_ci	pxor x1,		x4; \
3248c2ecf20Sopenharmony_ci	por x3,			x1; \
3258c2ecf20Sopenharmony_ci	pxor x0,		x3; \
3268c2ecf20Sopenharmony_ci	pxor x0,		x2; \
3278c2ecf20Sopenharmony_ci	por x4,			x0; \
3288c2ecf20Sopenharmony_ci	pxor x4,		x2; \
3298c2ecf20Sopenharmony_ci	pxor x0,		x1; \
3308c2ecf20Sopenharmony_ci	pxor x1,		x4;
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci#define SI2(x0, x1, x2, x3, x4) \
3338c2ecf20Sopenharmony_ci	pxor x1,		x2; \
3348c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
3358c2ecf20Sopenharmony_ci	pxor RNOT,		x3; \
3368c2ecf20Sopenharmony_ci	por x2,			x3; \
3378c2ecf20Sopenharmony_ci	pxor x4,		x2; \
3388c2ecf20Sopenharmony_ci	pxor x0,		x4; \
3398c2ecf20Sopenharmony_ci	pxor x1,		x3; \
3408c2ecf20Sopenharmony_ci	por x2,			x1; \
3418c2ecf20Sopenharmony_ci	pxor x0,		x2; \
3428c2ecf20Sopenharmony_ci	pxor x4,		x1; \
3438c2ecf20Sopenharmony_ci	por x3,			x4; \
3448c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3458c2ecf20Sopenharmony_ci	pxor x2,		x4; \
3468c2ecf20Sopenharmony_ci	pand x1,		x2; \
3478c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3488c2ecf20Sopenharmony_ci	pxor x4,		x3; \
3498c2ecf20Sopenharmony_ci	pxor x0,		x4;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci#define SI3(x0, x1, x2, x3, x4) \
3528c2ecf20Sopenharmony_ci	pxor x1,		x2; \
3538c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
3548c2ecf20Sopenharmony_ci	pand x2,		x1; \
3558c2ecf20Sopenharmony_ci	pxor x0,		x1; \
3568c2ecf20Sopenharmony_ci	por x4,			x0; \
3578c2ecf20Sopenharmony_ci	pxor x3,		x4; \
3588c2ecf20Sopenharmony_ci	pxor x3,		x0; \
3598c2ecf20Sopenharmony_ci	por x1,			x3; \
3608c2ecf20Sopenharmony_ci	pxor x2,		x1; \
3618c2ecf20Sopenharmony_ci	pxor x3,		x1; \
3628c2ecf20Sopenharmony_ci	pxor x2,		x0; \
3638c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3648c2ecf20Sopenharmony_ci	pand x1,		x3; \
3658c2ecf20Sopenharmony_ci	pxor x0,		x1; \
3668c2ecf20Sopenharmony_ci	pand x2,		x0; \
3678c2ecf20Sopenharmony_ci	pxor x3,		x4; \
3688c2ecf20Sopenharmony_ci	pxor x0,		x3; \
3698c2ecf20Sopenharmony_ci	pxor x1,		x0;
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci#define SI4(x0, x1, x2, x3, x4) \
3728c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3738c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
3748c2ecf20Sopenharmony_ci	pand x1,		x0; \
3758c2ecf20Sopenharmony_ci	pxor x2,		x0; \
3768c2ecf20Sopenharmony_ci	por x3,			x2; \
3778c2ecf20Sopenharmony_ci	pxor RNOT,		x4; \
3788c2ecf20Sopenharmony_ci	pxor x0,		x1; \
3798c2ecf20Sopenharmony_ci	pxor x2,		x0; \
3808c2ecf20Sopenharmony_ci	pand x4,		x2; \
3818c2ecf20Sopenharmony_ci	pxor x0,		x2; \
3828c2ecf20Sopenharmony_ci	por x4,			x0; \
3838c2ecf20Sopenharmony_ci	pxor x3,		x0; \
3848c2ecf20Sopenharmony_ci	pand x2,		x3; \
3858c2ecf20Sopenharmony_ci	pxor x3,		x4; \
3868c2ecf20Sopenharmony_ci	pxor x1,		x3; \
3878c2ecf20Sopenharmony_ci	pand x0,		x1; \
3888c2ecf20Sopenharmony_ci	pxor x1,		x4; \
3898c2ecf20Sopenharmony_ci	pxor x3,		x0;
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci#define SI5(x0, x1, x2, x3, x4) \
3928c2ecf20Sopenharmony_ci	movdqa x1,		x4; \
3938c2ecf20Sopenharmony_ci	por x2,			x1; \
3948c2ecf20Sopenharmony_ci	pxor x4,		x2; \
3958c2ecf20Sopenharmony_ci	pxor x3,		x1; \
3968c2ecf20Sopenharmony_ci	pand x4,		x3; \
3978c2ecf20Sopenharmony_ci	pxor x3,		x2; \
3988c2ecf20Sopenharmony_ci	por x0,			x3; \
3998c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
4008c2ecf20Sopenharmony_ci	pxor x2,		x3; \
4018c2ecf20Sopenharmony_ci	por x0,			x2; \
4028c2ecf20Sopenharmony_ci	pxor x1,		x4; \
4038c2ecf20Sopenharmony_ci	pxor x4,		x2; \
4048c2ecf20Sopenharmony_ci	pand x0,		x4; \
4058c2ecf20Sopenharmony_ci	pxor x1,		x0; \
4068c2ecf20Sopenharmony_ci	pxor x3,		x1; \
4078c2ecf20Sopenharmony_ci	pand x2,		x0; \
4088c2ecf20Sopenharmony_ci	pxor x3,		x2; \
4098c2ecf20Sopenharmony_ci	pxor x2,		x0; \
4108c2ecf20Sopenharmony_ci	pxor x4,		x2; \
4118c2ecf20Sopenharmony_ci	pxor x3,		x4;
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci#define SI6(x0, x1, x2, x3, x4) \
4148c2ecf20Sopenharmony_ci	pxor x2,		x0; \
4158c2ecf20Sopenharmony_ci	movdqa x0,		x4; \
4168c2ecf20Sopenharmony_ci	pand x3,		x0; \
4178c2ecf20Sopenharmony_ci	pxor x3,		x2; \
4188c2ecf20Sopenharmony_ci	pxor x2,		x0; \
4198c2ecf20Sopenharmony_ci	pxor x1,		x3; \
4208c2ecf20Sopenharmony_ci	por x4,			x2; \
4218c2ecf20Sopenharmony_ci	pxor x3,		x2; \
4228c2ecf20Sopenharmony_ci	pand x0,		x3; \
4238c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
4248c2ecf20Sopenharmony_ci	pxor x1,		x3; \
4258c2ecf20Sopenharmony_ci	pand x2,		x1; \
4268c2ecf20Sopenharmony_ci	pxor x0,		x4; \
4278c2ecf20Sopenharmony_ci	pxor x4,		x3; \
4288c2ecf20Sopenharmony_ci	pxor x2,		x4; \
4298c2ecf20Sopenharmony_ci	pxor x1,		x0; \
4308c2ecf20Sopenharmony_ci	pxor x0,		x2;
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci#define SI7(x0, x1, x2, x3, x4) \
4338c2ecf20Sopenharmony_ci	movdqa x3,		x4; \
4348c2ecf20Sopenharmony_ci	pand x0,		x3; \
4358c2ecf20Sopenharmony_ci	pxor x2,		x0; \
4368c2ecf20Sopenharmony_ci	por x4,			x2; \
4378c2ecf20Sopenharmony_ci	pxor x1,		x4; \
4388c2ecf20Sopenharmony_ci	pxor RNOT,		x0; \
4398c2ecf20Sopenharmony_ci	por x3,			x1; \
4408c2ecf20Sopenharmony_ci	pxor x0,		x4; \
4418c2ecf20Sopenharmony_ci	pand x2,		x0; \
4428c2ecf20Sopenharmony_ci	pxor x1,		x0; \
4438c2ecf20Sopenharmony_ci	pand x2,		x1; \
4448c2ecf20Sopenharmony_ci	pxor x2,		x3; \
4458c2ecf20Sopenharmony_ci	pxor x3,		x4; \
4468c2ecf20Sopenharmony_ci	pand x3,		x2; \
4478c2ecf20Sopenharmony_ci	por x0,			x3; \
4488c2ecf20Sopenharmony_ci	pxor x4,		x1; \
4498c2ecf20Sopenharmony_ci	pxor x4,		x3; \
4508c2ecf20Sopenharmony_ci	pand x0,		x4; \
4518c2ecf20Sopenharmony_ci	pxor x2,		x4;
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
4548c2ecf20Sopenharmony_ci	movdqa x0,		t2; \
4558c2ecf20Sopenharmony_ci	punpckldq x1,		x0; \
4568c2ecf20Sopenharmony_ci	punpckhdq x1,		t2; \
4578c2ecf20Sopenharmony_ci	movdqa x2,		t1; \
4588c2ecf20Sopenharmony_ci	punpckhdq x3,		x2; \
4598c2ecf20Sopenharmony_ci	punpckldq x3,		t1; \
4608c2ecf20Sopenharmony_ci	movdqa x0,		x1; \
4618c2ecf20Sopenharmony_ci	punpcklqdq t1,		x0; \
4628c2ecf20Sopenharmony_ci	punpckhqdq t1,		x1; \
4638c2ecf20Sopenharmony_ci	movdqa t2,		x3; \
4648c2ecf20Sopenharmony_ci	punpcklqdq x2,		t2; \
4658c2ecf20Sopenharmony_ci	punpckhqdq x2,		x3; \
4668c2ecf20Sopenharmony_ci	movdqa t2,		x2;
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
4698c2ecf20Sopenharmony_ci	movdqu (0*4*4)(in),	x0; \
4708c2ecf20Sopenharmony_ci	movdqu (1*4*4)(in),	x1; \
4718c2ecf20Sopenharmony_ci	movdqu (2*4*4)(in),	x2; \
4728c2ecf20Sopenharmony_ci	movdqu (3*4*4)(in),	x3; \
4738c2ecf20Sopenharmony_ci	\
4748c2ecf20Sopenharmony_ci	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
4778c2ecf20Sopenharmony_ci	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
4788c2ecf20Sopenharmony_ci	\
4798c2ecf20Sopenharmony_ci	movdqu x0, (0*4*4)(out); \
4808c2ecf20Sopenharmony_ci	movdqu x1, (1*4*4)(out); \
4818c2ecf20Sopenharmony_ci	movdqu x2, (2*4*4)(out); \
4828c2ecf20Sopenharmony_ci	movdqu x3, (3*4*4)(out);
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
4858c2ecf20Sopenharmony_ci	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
4868c2ecf20Sopenharmony_ci	\
4878c2ecf20Sopenharmony_ci	movdqu (0*4*4)(out),	t0; \
4888c2ecf20Sopenharmony_ci	pxor t0,		x0; \
4898c2ecf20Sopenharmony_ci	movdqu x0,		(0*4*4)(out); \
4908c2ecf20Sopenharmony_ci	movdqu (1*4*4)(out),	t0; \
4918c2ecf20Sopenharmony_ci	pxor t0,		x1; \
4928c2ecf20Sopenharmony_ci	movdqu x1,		(1*4*4)(out); \
4938c2ecf20Sopenharmony_ci	movdqu (2*4*4)(out),	t0; \
4948c2ecf20Sopenharmony_ci	pxor t0,		x2; \
4958c2ecf20Sopenharmony_ci	movdqu x2,		(2*4*4)(out); \
4968c2ecf20Sopenharmony_ci	movdqu (3*4*4)(out),	t0; \
4978c2ecf20Sopenharmony_ci	pxor t0,		x3; \
4988c2ecf20Sopenharmony_ci	movdqu x3,		(3*4*4)(out);
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ciSYM_FUNC_START(__serpent_enc_blk_4way)
5018c2ecf20Sopenharmony_ci	/* input:
5028c2ecf20Sopenharmony_ci	 *	arg_ctx(%esp): ctx, CTX
5038c2ecf20Sopenharmony_ci	 *	arg_dst(%esp): dst
5048c2ecf20Sopenharmony_ci	 *	arg_src(%esp): src
5058c2ecf20Sopenharmony_ci	 *	arg_xor(%esp): bool, if true: xor output
5068c2ecf20Sopenharmony_ci	 */
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	pcmpeqd RNOT, RNOT;
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	movl arg_ctx(%esp), CTX;
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	movl arg_src(%esp), %eax;
5138c2ecf20Sopenharmony_ci	read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci					 K(RA, RB, RC, RD, RE, 0);
5168c2ecf20Sopenharmony_ci	S0(RA, RB, RC, RD, RE);		LK(RC, RB, RD, RA, RE, 1);
5178c2ecf20Sopenharmony_ci	S1(RC, RB, RD, RA, RE);		LK(RE, RD, RA, RC, RB, 2);
5188c2ecf20Sopenharmony_ci	S2(RE, RD, RA, RC, RB);		LK(RB, RD, RE, RC, RA, 3);
5198c2ecf20Sopenharmony_ci	S3(RB, RD, RE, RC, RA);		LK(RC, RA, RD, RB, RE, 4);
5208c2ecf20Sopenharmony_ci	S4(RC, RA, RD, RB, RE);		LK(RA, RD, RB, RE, RC, 5);
5218c2ecf20Sopenharmony_ci	S5(RA, RD, RB, RE, RC);		LK(RC, RA, RD, RE, RB, 6);
5228c2ecf20Sopenharmony_ci	S6(RC, RA, RD, RE, RB);		LK(RD, RB, RA, RE, RC, 7);
5238c2ecf20Sopenharmony_ci	S7(RD, RB, RA, RE, RC);		LK(RC, RA, RE, RD, RB, 8);
5248c2ecf20Sopenharmony_ci	S0(RC, RA, RE, RD, RB);		LK(RE, RA, RD, RC, RB, 9);
5258c2ecf20Sopenharmony_ci	S1(RE, RA, RD, RC, RB);		LK(RB, RD, RC, RE, RA, 10);
5268c2ecf20Sopenharmony_ci	S2(RB, RD, RC, RE, RA);		LK(RA, RD, RB, RE, RC, 11);
5278c2ecf20Sopenharmony_ci	S3(RA, RD, RB, RE, RC);		LK(RE, RC, RD, RA, RB, 12);
5288c2ecf20Sopenharmony_ci	S4(RE, RC, RD, RA, RB);		LK(RC, RD, RA, RB, RE, 13);
5298c2ecf20Sopenharmony_ci	S5(RC, RD, RA, RB, RE);		LK(RE, RC, RD, RB, RA, 14);
5308c2ecf20Sopenharmony_ci	S6(RE, RC, RD, RB, RA);		LK(RD, RA, RC, RB, RE, 15);
5318c2ecf20Sopenharmony_ci	S7(RD, RA, RC, RB, RE);		LK(RE, RC, RB, RD, RA, 16);
5328c2ecf20Sopenharmony_ci	S0(RE, RC, RB, RD, RA);		LK(RB, RC, RD, RE, RA, 17);
5338c2ecf20Sopenharmony_ci	S1(RB, RC, RD, RE, RA);		LK(RA, RD, RE, RB, RC, 18);
5348c2ecf20Sopenharmony_ci	S2(RA, RD, RE, RB, RC);		LK(RC, RD, RA, RB, RE, 19);
5358c2ecf20Sopenharmony_ci	S3(RC, RD, RA, RB, RE);		LK(RB, RE, RD, RC, RA, 20);
5368c2ecf20Sopenharmony_ci	S4(RB, RE, RD, RC, RA);		LK(RE, RD, RC, RA, RB, 21);
5378c2ecf20Sopenharmony_ci	S5(RE, RD, RC, RA, RB);		LK(RB, RE, RD, RA, RC, 22);
5388c2ecf20Sopenharmony_ci	S6(RB, RE, RD, RA, RC);		LK(RD, RC, RE, RA, RB, 23);
5398c2ecf20Sopenharmony_ci	S7(RD, RC, RE, RA, RB);		LK(RB, RE, RA, RD, RC, 24);
5408c2ecf20Sopenharmony_ci	S0(RB, RE, RA, RD, RC);		LK(RA, RE, RD, RB, RC, 25);
5418c2ecf20Sopenharmony_ci	S1(RA, RE, RD, RB, RC);		LK(RC, RD, RB, RA, RE, 26);
5428c2ecf20Sopenharmony_ci	S2(RC, RD, RB, RA, RE);		LK(RE, RD, RC, RA, RB, 27);
5438c2ecf20Sopenharmony_ci	S3(RE, RD, RC, RA, RB);		LK(RA, RB, RD, RE, RC, 28);
5448c2ecf20Sopenharmony_ci	S4(RA, RB, RD, RE, RC);		LK(RB, RD, RE, RC, RA, 29);
5458c2ecf20Sopenharmony_ci	S5(RB, RD, RE, RC, RA);		LK(RA, RB, RD, RC, RE, 30);
5468c2ecf20Sopenharmony_ci	S6(RA, RB, RD, RC, RE);		LK(RD, RE, RB, RC, RA, 31);
5478c2ecf20Sopenharmony_ci	S7(RD, RE, RB, RC, RA);		 K(RA, RB, RC, RD, RE, 32);
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci	movl arg_dst(%esp), %eax;
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci	cmpb $0, arg_xor(%esp);
5528c2ecf20Sopenharmony_ci	jnz .L__enc_xor4;
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci	write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	RET;
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci.L__enc_xor4:
5598c2ecf20Sopenharmony_ci	xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	RET;
5628c2ecf20Sopenharmony_ciSYM_FUNC_END(__serpent_enc_blk_4way)
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_dec_blk_4way)
5658c2ecf20Sopenharmony_ci	/* input:
5668c2ecf20Sopenharmony_ci	 *	arg_ctx(%esp): ctx, CTX
5678c2ecf20Sopenharmony_ci	 *	arg_dst(%esp): dst
5688c2ecf20Sopenharmony_ci	 *	arg_src(%esp): src
5698c2ecf20Sopenharmony_ci	 */
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	pcmpeqd RNOT, RNOT;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	movl arg_ctx(%esp), CTX;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	movl arg_src(%esp), %eax;
5768c2ecf20Sopenharmony_ci	read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci					 K(RA, RB, RC, RD, RE, 32);
5798c2ecf20Sopenharmony_ci	SI7(RA, RB, RC, RD, RE);	KL(RB, RD, RA, RE, RC, 31);
5808c2ecf20Sopenharmony_ci	SI6(RB, RD, RA, RE, RC);	KL(RA, RC, RE, RB, RD, 30);
5818c2ecf20Sopenharmony_ci	SI5(RA, RC, RE, RB, RD);	KL(RC, RD, RA, RE, RB, 29);
5828c2ecf20Sopenharmony_ci	SI4(RC, RD, RA, RE, RB);	KL(RC, RA, RB, RE, RD, 28);
5838c2ecf20Sopenharmony_ci	SI3(RC, RA, RB, RE, RD);	KL(RB, RC, RD, RE, RA, 27);
5848c2ecf20Sopenharmony_ci	SI2(RB, RC, RD, RE, RA);	KL(RC, RA, RE, RD, RB, 26);
5858c2ecf20Sopenharmony_ci	SI1(RC, RA, RE, RD, RB);	KL(RB, RA, RE, RD, RC, 25);
5868c2ecf20Sopenharmony_ci	SI0(RB, RA, RE, RD, RC);	KL(RE, RC, RA, RB, RD, 24);
5878c2ecf20Sopenharmony_ci	SI7(RE, RC, RA, RB, RD);	KL(RC, RB, RE, RD, RA, 23);
5888c2ecf20Sopenharmony_ci	SI6(RC, RB, RE, RD, RA);	KL(RE, RA, RD, RC, RB, 22);
5898c2ecf20Sopenharmony_ci	SI5(RE, RA, RD, RC, RB);	KL(RA, RB, RE, RD, RC, 21);
5908c2ecf20Sopenharmony_ci	SI4(RA, RB, RE, RD, RC);	KL(RA, RE, RC, RD, RB, 20);
5918c2ecf20Sopenharmony_ci	SI3(RA, RE, RC, RD, RB);	KL(RC, RA, RB, RD, RE, 19);
5928c2ecf20Sopenharmony_ci	SI2(RC, RA, RB, RD, RE);	KL(RA, RE, RD, RB, RC, 18);
5938c2ecf20Sopenharmony_ci	SI1(RA, RE, RD, RB, RC);	KL(RC, RE, RD, RB, RA, 17);
5948c2ecf20Sopenharmony_ci	SI0(RC, RE, RD, RB, RA);	KL(RD, RA, RE, RC, RB, 16);
5958c2ecf20Sopenharmony_ci	SI7(RD, RA, RE, RC, RB);	KL(RA, RC, RD, RB, RE, 15);
5968c2ecf20Sopenharmony_ci	SI6(RA, RC, RD, RB, RE);	KL(RD, RE, RB, RA, RC, 14);
5978c2ecf20Sopenharmony_ci	SI5(RD, RE, RB, RA, RC);	KL(RE, RC, RD, RB, RA, 13);
5988c2ecf20Sopenharmony_ci	SI4(RE, RC, RD, RB, RA);	KL(RE, RD, RA, RB, RC, 12);
5998c2ecf20Sopenharmony_ci	SI3(RE, RD, RA, RB, RC);	KL(RA, RE, RC, RB, RD, 11);
6008c2ecf20Sopenharmony_ci	SI2(RA, RE, RC, RB, RD);	KL(RE, RD, RB, RC, RA, 10);
6018c2ecf20Sopenharmony_ci	SI1(RE, RD, RB, RC, RA);	KL(RA, RD, RB, RC, RE, 9);
6028c2ecf20Sopenharmony_ci	SI0(RA, RD, RB, RC, RE);	KL(RB, RE, RD, RA, RC, 8);
6038c2ecf20Sopenharmony_ci	SI7(RB, RE, RD, RA, RC);	KL(RE, RA, RB, RC, RD, 7);
6048c2ecf20Sopenharmony_ci	SI6(RE, RA, RB, RC, RD);	KL(RB, RD, RC, RE, RA, 6);
6058c2ecf20Sopenharmony_ci	SI5(RB, RD, RC, RE, RA);	KL(RD, RA, RB, RC, RE, 5);
6068c2ecf20Sopenharmony_ci	SI4(RD, RA, RB, RC, RE);	KL(RD, RB, RE, RC, RA, 4);
6078c2ecf20Sopenharmony_ci	SI3(RD, RB, RE, RC, RA);	KL(RE, RD, RA, RC, RB, 3);
6088c2ecf20Sopenharmony_ci	SI2(RE, RD, RA, RC, RB);	KL(RD, RB, RC, RA, RE, 2);
6098c2ecf20Sopenharmony_ci	SI1(RD, RB, RC, RA, RE);	KL(RE, RB, RC, RA, RD, 1);
6108c2ecf20Sopenharmony_ci	SI0(RE, RB, RC, RA, RD);	 K(RC, RD, RB, RE, RA, 0);
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci	movl arg_dst(%esp), %eax;
6138c2ecf20Sopenharmony_ci	write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	RET;
6168c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_dec_blk_4way)
617