18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * x86_64/AVX2 assembler optimized version of Serpent 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Based on AVX assembler implementation of Serpent by: 88c2ecf20Sopenharmony_ci * Copyright © 2012 Johannes Goetzfried 98c2ecf20Sopenharmony_ci * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/linkage.h> 138c2ecf20Sopenharmony_ci#include <asm/frame.h> 148c2ecf20Sopenharmony_ci#include "glue_helper-asm-avx2.S" 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci.file "serpent-avx2-asm_64.S" 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 198c2ecf20Sopenharmony_ci.align 16 208c2ecf20Sopenharmony_ci.Lbswap128_mask: 218c2ecf20Sopenharmony_ci .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 248c2ecf20Sopenharmony_ci.align 16 258c2ecf20Sopenharmony_ci.Lxts_gf128mul_and_shl1_mask_0: 268c2ecf20Sopenharmony_ci .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 298c2ecf20Sopenharmony_ci.align 16 308c2ecf20Sopenharmony_ci.Lxts_gf128mul_and_shl1_mask_1: 318c2ecf20Sopenharmony_ci .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci.text 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#define CTX %rdi 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#define RNOT %ymm0 388c2ecf20Sopenharmony_ci#define tp %ymm1 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci#define RA1 %ymm2 418c2ecf20Sopenharmony_ci#define RA2 %ymm3 428c2ecf20Sopenharmony_ci#define RB1 %ymm4 438c2ecf20Sopenharmony_ci#define RB2 %ymm5 448c2ecf20Sopenharmony_ci#define RC1 %ymm6 458c2ecf20Sopenharmony_ci#define RC2 %ymm7 468c2ecf20Sopenharmony_ci#define RD1 %ymm8 478c2ecf20Sopenharmony_ci#define RD2 %ymm9 488c2ecf20Sopenharmony_ci#define RE1 %ymm10 498c2ecf20Sopenharmony_ci#define RE2 %ymm11 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci#define RK0 %ymm12 528c2ecf20Sopenharmony_ci#define RK1 %ymm13 538c2ecf20Sopenharmony_ci#define RK2 %ymm14 548c2ecf20Sopenharmony_ci#define RK3 %ymm15 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#define RK0x %xmm12 578c2ecf20Sopenharmony_ci#define RK1x %xmm13 588c2ecf20Sopenharmony_ci#define RK2x %xmm14 598c2ecf20Sopenharmony_ci#define RK3x %xmm15 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci#define S0_1(x0, x1, x2, x3, x4) \ 628c2ecf20Sopenharmony_ci vpor x0, x3, tp; \ 638c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 648c2ecf20Sopenharmony_ci vpxor x2, x3, x4; \ 658c2ecf20Sopenharmony_ci vpxor RNOT, x4, x4; \ 668c2ecf20Sopenharmony_ci vpxor x1, tp, x3; \ 678c2ecf20Sopenharmony_ci vpand x0, x1, x1; \ 688c2ecf20Sopenharmony_ci vpxor x4, x1, x1; \ 698c2ecf20Sopenharmony_ci vpxor x0, x2, x2; 708c2ecf20Sopenharmony_ci#define S0_2(x0, x1, x2, x3, x4) \ 718c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 728c2ecf20Sopenharmony_ci vpor x0, x4, x4; \ 738c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 748c2ecf20Sopenharmony_ci vpand x1, x2, x2; \ 758c2ecf20Sopenharmony_ci vpxor x2, x3, x3; \ 768c2ecf20Sopenharmony_ci vpxor RNOT, x1, x1; \ 778c2ecf20Sopenharmony_ci vpxor x4, x2, x2; \ 788c2ecf20Sopenharmony_ci vpxor x2, x1, x1; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci#define S1_1(x0, x1, x2, x3, x4) \ 818c2ecf20Sopenharmony_ci vpxor x0, x1, tp; \ 828c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 838c2ecf20Sopenharmony_ci vpxor RNOT, x3, x3; \ 848c2ecf20Sopenharmony_ci vpand tp, x1, x4; \ 858c2ecf20Sopenharmony_ci vpor tp, x0, x0; \ 868c2ecf20Sopenharmony_ci vpxor x2, x3, x3; \ 878c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 888c2ecf20Sopenharmony_ci vpxor x3, tp, x1; 898c2ecf20Sopenharmony_ci#define S1_2(x0, x1, x2, x3, x4) \ 908c2ecf20Sopenharmony_ci vpxor x4, x3, x3; \ 918c2ecf20Sopenharmony_ci vpor x4, x1, x1; \ 928c2ecf20Sopenharmony_ci vpxor x2, x4, x4; \ 938c2ecf20Sopenharmony_ci vpand x0, x2, x2; \ 948c2ecf20Sopenharmony_ci vpxor x1, x2, x2; \ 958c2ecf20Sopenharmony_ci vpor x0, x1, x1; \ 968c2ecf20Sopenharmony_ci vpxor RNOT, x0, x0; \ 978c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 988c2ecf20Sopenharmony_ci vpxor x1, x4, x4; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci#define S2_1(x0, x1, x2, x3, x4) \ 1018c2ecf20Sopenharmony_ci vpxor RNOT, x3, x3; \ 1028c2ecf20Sopenharmony_ci vpxor x0, x1, x1; \ 1038c2ecf20Sopenharmony_ci vpand x2, x0, tp; \ 1048c2ecf20Sopenharmony_ci vpxor x3, tp, tp; \ 1058c2ecf20Sopenharmony_ci vpor x0, x3, x3; \ 1068c2ecf20Sopenharmony_ci vpxor x1, x2, x2; \ 1078c2ecf20Sopenharmony_ci vpxor x1, x3, x3; \ 1088c2ecf20Sopenharmony_ci vpand tp, x1, x1; 1098c2ecf20Sopenharmony_ci#define S2_2(x0, x1, x2, x3, x4) \ 1108c2ecf20Sopenharmony_ci vpxor x2, tp, tp; \ 1118c2ecf20Sopenharmony_ci vpand x3, x2, x2; \ 1128c2ecf20Sopenharmony_ci vpor x1, x3, x3; \ 1138c2ecf20Sopenharmony_ci vpxor RNOT, tp, tp; \ 1148c2ecf20Sopenharmony_ci vpxor tp, x3, x3; \ 1158c2ecf20Sopenharmony_ci vpxor tp, x0, x4; \ 1168c2ecf20Sopenharmony_ci vpxor x2, tp, x0; \ 1178c2ecf20Sopenharmony_ci vpor x2, x1, x1; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci#define S3_1(x0, x1, x2, x3, x4) \ 1208c2ecf20Sopenharmony_ci vpxor x3, x1, tp; \ 1218c2ecf20Sopenharmony_ci vpor x0, x3, x3; \ 1228c2ecf20Sopenharmony_ci vpand x0, x1, x4; \ 1238c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 1248c2ecf20Sopenharmony_ci vpxor tp, x2, x2; \ 1258c2ecf20Sopenharmony_ci vpand x3, tp, x1; \ 1268c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 1278c2ecf20Sopenharmony_ci vpor x4, x0, x0; \ 1288c2ecf20Sopenharmony_ci vpxor x3, x4, x4; 1298c2ecf20Sopenharmony_ci#define S3_2(x0, x1, x2, x3, x4) \ 1308c2ecf20Sopenharmony_ci vpxor x0, x1, x1; \ 1318c2ecf20Sopenharmony_ci vpand x3, x0, x0; \ 1328c2ecf20Sopenharmony_ci vpand x4, x3, x3; \ 1338c2ecf20Sopenharmony_ci vpxor x2, x3, x3; \ 1348c2ecf20Sopenharmony_ci vpor x1, x4, x4; \ 1358c2ecf20Sopenharmony_ci vpand x1, x2, x2; \ 1368c2ecf20Sopenharmony_ci vpxor x3, x4, x4; \ 1378c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 1388c2ecf20Sopenharmony_ci vpxor x2, x3, x3; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci#define S4_1(x0, x1, x2, x3, x4) \ 1418c2ecf20Sopenharmony_ci vpand x0, x3, tp; \ 1428c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 1438c2ecf20Sopenharmony_ci vpxor x2, tp, tp; \ 1448c2ecf20Sopenharmony_ci vpor x3, x2, x2; \ 1458c2ecf20Sopenharmony_ci vpxor x1, x0, x0; \ 1468c2ecf20Sopenharmony_ci vpxor tp, x3, x4; \ 1478c2ecf20Sopenharmony_ci vpor x0, x2, x2; \ 1488c2ecf20Sopenharmony_ci vpxor x1, x2, x2; 1498c2ecf20Sopenharmony_ci#define S4_2(x0, x1, x2, x3, x4) \ 1508c2ecf20Sopenharmony_ci vpand x0, x1, x1; \ 1518c2ecf20Sopenharmony_ci vpxor x4, x1, x1; \ 1528c2ecf20Sopenharmony_ci vpand x2, x4, x4; \ 1538c2ecf20Sopenharmony_ci vpxor tp, x2, x2; \ 1548c2ecf20Sopenharmony_ci vpxor x0, x4, x4; \ 1558c2ecf20Sopenharmony_ci vpor x1, tp, x3; \ 1568c2ecf20Sopenharmony_ci vpxor RNOT, x1, x1; \ 1578c2ecf20Sopenharmony_ci vpxor x0, x3, x3; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci#define S5_1(x0, x1, x2, x3, x4) \ 1608c2ecf20Sopenharmony_ci vpor x0, x1, tp; \ 1618c2ecf20Sopenharmony_ci vpxor tp, x2, x2; \ 1628c2ecf20Sopenharmony_ci vpxor RNOT, x3, x3; \ 1638c2ecf20Sopenharmony_ci vpxor x0, x1, x4; \ 1648c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 1658c2ecf20Sopenharmony_ci vpand x4, tp, x1; \ 1668c2ecf20Sopenharmony_ci vpor x3, x4, x4; \ 1678c2ecf20Sopenharmony_ci vpxor x0, x4, x4; 1688c2ecf20Sopenharmony_ci#define S5_2(x0, x1, x2, x3, x4) \ 1698c2ecf20Sopenharmony_ci vpand x3, x0, x0; \ 1708c2ecf20Sopenharmony_ci vpxor x3, x1, x1; \ 1718c2ecf20Sopenharmony_ci vpxor x2, x3, x3; \ 1728c2ecf20Sopenharmony_ci vpxor x1, x0, x0; \ 1738c2ecf20Sopenharmony_ci vpand x4, x2, x2; \ 1748c2ecf20Sopenharmony_ci vpxor x2, x1, x1; \ 1758c2ecf20Sopenharmony_ci vpand x0, x2, x2; \ 1768c2ecf20Sopenharmony_ci vpxor x2, x3, x3; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci#define S6_1(x0, x1, x2, x3, x4) \ 1798c2ecf20Sopenharmony_ci vpxor x0, x3, x3; \ 1808c2ecf20Sopenharmony_ci vpxor x2, x1, tp; \ 1818c2ecf20Sopenharmony_ci vpxor x0, x2, x2; \ 1828c2ecf20Sopenharmony_ci vpand x3, x0, x0; \ 1838c2ecf20Sopenharmony_ci vpor x3, tp, tp; \ 1848c2ecf20Sopenharmony_ci vpxor RNOT, x1, x4; \ 1858c2ecf20Sopenharmony_ci vpxor tp, x0, x0; \ 1868c2ecf20Sopenharmony_ci vpxor x2, tp, x1; 1878c2ecf20Sopenharmony_ci#define S6_2(x0, x1, x2, x3, x4) \ 1888c2ecf20Sopenharmony_ci vpxor x4, x3, x3; \ 1898c2ecf20Sopenharmony_ci vpxor x0, x4, x4; \ 1908c2ecf20Sopenharmony_ci vpand x0, x2, x2; \ 1918c2ecf20Sopenharmony_ci vpxor x1, x4, x4; \ 1928c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 1938c2ecf20Sopenharmony_ci vpand x1, x3, x3; \ 1948c2ecf20Sopenharmony_ci vpxor x0, x3, x3; \ 1958c2ecf20Sopenharmony_ci vpxor x2, x1, x1; 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci#define S7_1(x0, x1, x2, x3, x4) \ 1988c2ecf20Sopenharmony_ci vpxor RNOT, x1, tp; \ 1998c2ecf20Sopenharmony_ci vpxor RNOT, x0, x0; \ 2008c2ecf20Sopenharmony_ci vpand x2, tp, x1; \ 2018c2ecf20Sopenharmony_ci vpxor x3, x1, x1; \ 2028c2ecf20Sopenharmony_ci vpor tp, x3, x3; \ 2038c2ecf20Sopenharmony_ci vpxor x2, tp, x4; \ 2048c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2058c2ecf20Sopenharmony_ci vpxor x0, x3, x3; \ 2068c2ecf20Sopenharmony_ci vpor x1, x0, x0; 2078c2ecf20Sopenharmony_ci#define S7_2(x0, x1, x2, x3, x4) \ 2088c2ecf20Sopenharmony_ci vpand x0, x2, x2; \ 2098c2ecf20Sopenharmony_ci vpxor x4, x0, x0; \ 2108c2ecf20Sopenharmony_ci vpxor x3, x4, x4; \ 2118c2ecf20Sopenharmony_ci vpand x0, x3, x3; \ 2128c2ecf20Sopenharmony_ci vpxor x1, x4, x4; \ 2138c2ecf20Sopenharmony_ci vpxor x4, x2, x2; \ 2148c2ecf20Sopenharmony_ci vpxor x1, x3, x3; \ 2158c2ecf20Sopenharmony_ci vpor x0, x4, x4; \ 2168c2ecf20Sopenharmony_ci vpxor x1, x4, x4; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci#define SI0_1(x0, x1, x2, x3, x4) \ 2198c2ecf20Sopenharmony_ci vpxor x0, x1, x1; \ 2208c2ecf20Sopenharmony_ci vpor x1, x3, tp; \ 2218c2ecf20Sopenharmony_ci vpxor x1, x3, x4; \ 2228c2ecf20Sopenharmony_ci vpxor RNOT, x0, x0; \ 2238c2ecf20Sopenharmony_ci vpxor tp, x2, x2; \ 2248c2ecf20Sopenharmony_ci vpxor x0, tp, x3; \ 2258c2ecf20Sopenharmony_ci vpand x1, x0, x0; \ 2268c2ecf20Sopenharmony_ci vpxor x2, x0, x0; 2278c2ecf20Sopenharmony_ci#define SI0_2(x0, x1, x2, x3, x4) \ 2288c2ecf20Sopenharmony_ci vpand x3, x2, x2; \ 2298c2ecf20Sopenharmony_ci vpxor x4, x3, x3; \ 2308c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2318c2ecf20Sopenharmony_ci vpxor x3, x1, x1; \ 2328c2ecf20Sopenharmony_ci vpand x0, x3, x3; \ 2338c2ecf20Sopenharmony_ci vpxor x0, x1, x1; \ 2348c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 2358c2ecf20Sopenharmony_ci vpxor x3, x4, x4; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci#define SI1_1(x0, x1, x2, x3, x4) \ 2388c2ecf20Sopenharmony_ci vpxor x3, x1, x1; \ 2398c2ecf20Sopenharmony_ci vpxor x2, x0, tp; \ 2408c2ecf20Sopenharmony_ci vpxor RNOT, x2, x2; \ 2418c2ecf20Sopenharmony_ci vpor x1, x0, x4; \ 2428c2ecf20Sopenharmony_ci vpxor x3, x4, x4; \ 2438c2ecf20Sopenharmony_ci vpand x1, x3, x3; \ 2448c2ecf20Sopenharmony_ci vpxor x2, x1, x1; \ 2458c2ecf20Sopenharmony_ci vpand x4, x2, x2; 2468c2ecf20Sopenharmony_ci#define SI1_2(x0, x1, x2, x3, x4) \ 2478c2ecf20Sopenharmony_ci vpxor x1, x4, x4; \ 2488c2ecf20Sopenharmony_ci vpor x3, x1, x1; \ 2498c2ecf20Sopenharmony_ci vpxor tp, x3, x3; \ 2508c2ecf20Sopenharmony_ci vpxor tp, x2, x2; \ 2518c2ecf20Sopenharmony_ci vpor x4, tp, x0; \ 2528c2ecf20Sopenharmony_ci vpxor x4, x2, x2; \ 2538c2ecf20Sopenharmony_ci vpxor x0, x1, x1; \ 2548c2ecf20Sopenharmony_ci vpxor x1, x4, x4; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci#define SI2_1(x0, x1, x2, x3, x4) \ 2578c2ecf20Sopenharmony_ci vpxor x1, x2, x2; \ 2588c2ecf20Sopenharmony_ci vpxor RNOT, x3, tp; \ 2598c2ecf20Sopenharmony_ci vpor x2, tp, tp; \ 2608c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2618c2ecf20Sopenharmony_ci vpxor x0, x3, x4; \ 2628c2ecf20Sopenharmony_ci vpxor x1, tp, x3; \ 2638c2ecf20Sopenharmony_ci vpor x2, x1, x1; \ 2648c2ecf20Sopenharmony_ci vpxor x0, x2, x2; 2658c2ecf20Sopenharmony_ci#define SI2_2(x0, x1, x2, x3, x4) \ 2668c2ecf20Sopenharmony_ci vpxor x4, x1, x1; \ 2678c2ecf20Sopenharmony_ci vpor x3, x4, x4; \ 2688c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2698c2ecf20Sopenharmony_ci vpxor x2, x4, x4; \ 2708c2ecf20Sopenharmony_ci vpand x1, x2, x2; \ 2718c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2728c2ecf20Sopenharmony_ci vpxor x4, x3, x3; \ 2738c2ecf20Sopenharmony_ci vpxor x0, x4, x4; 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci#define SI3_1(x0, x1, x2, x3, x4) \ 2768c2ecf20Sopenharmony_ci vpxor x1, x2, x2; \ 2778c2ecf20Sopenharmony_ci vpand x2, x1, tp; \ 2788c2ecf20Sopenharmony_ci vpxor x0, tp, tp; \ 2798c2ecf20Sopenharmony_ci vpor x1, x0, x0; \ 2808c2ecf20Sopenharmony_ci vpxor x3, x1, x4; \ 2818c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 2828c2ecf20Sopenharmony_ci vpor tp, x3, x3; \ 2838c2ecf20Sopenharmony_ci vpxor x2, tp, x1; 2848c2ecf20Sopenharmony_ci#define SI3_2(x0, x1, x2, x3, x4) \ 2858c2ecf20Sopenharmony_ci vpxor x3, x1, x1; \ 2868c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 2878c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2888c2ecf20Sopenharmony_ci vpand x1, x3, x3; \ 2898c2ecf20Sopenharmony_ci vpxor x0, x1, x1; \ 2908c2ecf20Sopenharmony_ci vpand x2, x0, x0; \ 2918c2ecf20Sopenharmony_ci vpxor x3, x4, x4; \ 2928c2ecf20Sopenharmony_ci vpxor x0, x3, x3; \ 2938c2ecf20Sopenharmony_ci vpxor x1, x0, x0; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci#define SI4_1(x0, x1, x2, x3, x4) \ 2968c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 2978c2ecf20Sopenharmony_ci vpand x1, x0, tp; \ 2988c2ecf20Sopenharmony_ci vpxor x2, tp, tp; \ 2998c2ecf20Sopenharmony_ci vpor x3, x2, x2; \ 3008c2ecf20Sopenharmony_ci vpxor RNOT, x0, x4; \ 3018c2ecf20Sopenharmony_ci vpxor tp, x1, x1; \ 3028c2ecf20Sopenharmony_ci vpxor x2, tp, x0; \ 3038c2ecf20Sopenharmony_ci vpand x4, x2, x2; 3048c2ecf20Sopenharmony_ci#define SI4_2(x0, x1, x2, x3, x4) \ 3058c2ecf20Sopenharmony_ci vpxor x0, x2, x2; \ 3068c2ecf20Sopenharmony_ci vpor x4, x0, x0; \ 3078c2ecf20Sopenharmony_ci vpxor x3, x0, x0; \ 3088c2ecf20Sopenharmony_ci vpand x2, x3, x3; \ 3098c2ecf20Sopenharmony_ci vpxor x3, x4, x4; \ 3108c2ecf20Sopenharmony_ci vpxor x1, x3, x3; \ 3118c2ecf20Sopenharmony_ci vpand x0, x1, x1; \ 3128c2ecf20Sopenharmony_ci vpxor x1, x4, x4; \ 3138c2ecf20Sopenharmony_ci vpxor x3, x0, x0; 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci#define SI5_1(x0, x1, x2, x3, x4) \ 3168c2ecf20Sopenharmony_ci vpor x2, x1, tp; \ 3178c2ecf20Sopenharmony_ci vpxor x1, x2, x2; \ 3188c2ecf20Sopenharmony_ci vpxor x3, tp, tp; \ 3198c2ecf20Sopenharmony_ci vpand x1, x3, x3; \ 3208c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 3218c2ecf20Sopenharmony_ci vpor x0, x3, x3; \ 3228c2ecf20Sopenharmony_ci vpxor RNOT, x0, x0; \ 3238c2ecf20Sopenharmony_ci vpxor x2, x3, x3; \ 3248c2ecf20Sopenharmony_ci vpor x0, x2, x2; 3258c2ecf20Sopenharmony_ci#define SI5_2(x0, x1, x2, x3, x4) \ 3268c2ecf20Sopenharmony_ci vpxor tp, x1, x4; \ 3278c2ecf20Sopenharmony_ci vpxor x4, x2, x2; \ 3288c2ecf20Sopenharmony_ci vpand x0, x4, x4; \ 3298c2ecf20Sopenharmony_ci vpxor tp, x0, x0; \ 3308c2ecf20Sopenharmony_ci vpxor x3, tp, x1; \ 3318c2ecf20Sopenharmony_ci vpand x2, x0, x0; \ 3328c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 3338c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 3348c2ecf20Sopenharmony_ci vpxor x4, x2, x2; \ 3358c2ecf20Sopenharmony_ci vpxor x3, x4, x4; 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci#define SI6_1(x0, x1, x2, x3, x4) \ 3388c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 3398c2ecf20Sopenharmony_ci vpand x3, x0, tp; \ 3408c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 3418c2ecf20Sopenharmony_ci vpxor x2, tp, tp; \ 3428c2ecf20Sopenharmony_ci vpxor x1, x3, x3; \ 3438c2ecf20Sopenharmony_ci vpor x0, x2, x2; \ 3448c2ecf20Sopenharmony_ci vpxor x3, x2, x2; \ 3458c2ecf20Sopenharmony_ci vpand tp, x3, x3; 3468c2ecf20Sopenharmony_ci#define SI6_2(x0, x1, x2, x3, x4) \ 3478c2ecf20Sopenharmony_ci vpxor RNOT, tp, tp; \ 3488c2ecf20Sopenharmony_ci vpxor x1, x3, x3; \ 3498c2ecf20Sopenharmony_ci vpand x2, x1, x1; \ 3508c2ecf20Sopenharmony_ci vpxor tp, x0, x4; \ 3518c2ecf20Sopenharmony_ci vpxor x4, x3, x3; \ 3528c2ecf20Sopenharmony_ci vpxor x2, x4, x4; \ 3538c2ecf20Sopenharmony_ci vpxor x1, tp, x0; \ 3548c2ecf20Sopenharmony_ci vpxor x0, x2, x2; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci#define SI7_1(x0, x1, x2, x3, x4) \ 3578c2ecf20Sopenharmony_ci vpand x0, x3, tp; \ 3588c2ecf20Sopenharmony_ci vpxor x2, x0, x0; \ 3598c2ecf20Sopenharmony_ci vpor x3, x2, x2; \ 3608c2ecf20Sopenharmony_ci vpxor x1, x3, x4; \ 3618c2ecf20Sopenharmony_ci vpxor RNOT, x0, x0; \ 3628c2ecf20Sopenharmony_ci vpor tp, x1, x1; \ 3638c2ecf20Sopenharmony_ci vpxor x0, x4, x4; \ 3648c2ecf20Sopenharmony_ci vpand x2, x0, x0; \ 3658c2ecf20Sopenharmony_ci vpxor x1, x0, x0; 3668c2ecf20Sopenharmony_ci#define SI7_2(x0, x1, x2, x3, x4) \ 3678c2ecf20Sopenharmony_ci vpand x2, x1, x1; \ 3688c2ecf20Sopenharmony_ci vpxor x2, tp, x3; \ 3698c2ecf20Sopenharmony_ci vpxor x3, x4, x4; \ 3708c2ecf20Sopenharmony_ci vpand x3, x2, x2; \ 3718c2ecf20Sopenharmony_ci vpor x0, x3, x3; \ 3728c2ecf20Sopenharmony_ci vpxor x4, x1, x1; \ 3738c2ecf20Sopenharmony_ci vpxor x4, x3, x3; \ 3748c2ecf20Sopenharmony_ci vpand x0, x4, x4; \ 3758c2ecf20Sopenharmony_ci vpxor x2, x4, x4; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci#define get_key(i,j,t) \ 3788c2ecf20Sopenharmony_ci vpbroadcastd (4*(i)+(j))*4(CTX), t; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci#define K2(x0, x1, x2, x3, x4, i) \ 3818c2ecf20Sopenharmony_ci get_key(i, 0, RK0); \ 3828c2ecf20Sopenharmony_ci get_key(i, 1, RK1); \ 3838c2ecf20Sopenharmony_ci get_key(i, 2, RK2); \ 3848c2ecf20Sopenharmony_ci get_key(i, 3, RK3); \ 3858c2ecf20Sopenharmony_ci vpxor RK0, x0 ## 1, x0 ## 1; \ 3868c2ecf20Sopenharmony_ci vpxor RK1, x1 ## 1, x1 ## 1; \ 3878c2ecf20Sopenharmony_ci vpxor RK2, x2 ## 1, x2 ## 1; \ 3888c2ecf20Sopenharmony_ci vpxor RK3, x3 ## 1, x3 ## 1; \ 3898c2ecf20Sopenharmony_ci vpxor RK0, x0 ## 2, x0 ## 2; \ 3908c2ecf20Sopenharmony_ci vpxor RK1, x1 ## 2, x1 ## 2; \ 3918c2ecf20Sopenharmony_ci vpxor RK2, x2 ## 2, x2 ## 2; \ 3928c2ecf20Sopenharmony_ci vpxor RK3, x3 ## 2, x3 ## 2; 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci#define LK2(x0, x1, x2, x3, x4, i) \ 3958c2ecf20Sopenharmony_ci vpslld $13, x0 ## 1, x4 ## 1; \ 3968c2ecf20Sopenharmony_ci vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ 3978c2ecf20Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 3988c2ecf20Sopenharmony_ci vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ 3998c2ecf20Sopenharmony_ci vpslld $3, x2 ## 1, x4 ## 1; \ 4008c2ecf20Sopenharmony_ci vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ 4018c2ecf20Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 4028c2ecf20Sopenharmony_ci vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ 4038c2ecf20Sopenharmony_ci vpslld $13, x0 ## 2, x4 ## 2; \ 4048c2ecf20Sopenharmony_ci vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ 4058c2ecf20Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 4068c2ecf20Sopenharmony_ci vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ 4078c2ecf20Sopenharmony_ci vpslld $3, x2 ## 2, x4 ## 2; \ 4088c2ecf20Sopenharmony_ci vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ 4098c2ecf20Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; \ 4108c2ecf20Sopenharmony_ci vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ 4118c2ecf20Sopenharmony_ci vpslld $1, x1 ## 1, x4 ## 1; \ 4128c2ecf20Sopenharmony_ci vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ 4138c2ecf20Sopenharmony_ci vpor x4 ## 1, x1 ## 1, x1 ## 1; \ 4148c2ecf20Sopenharmony_ci vpslld $3, x0 ## 1, x4 ## 1; \ 4158c2ecf20Sopenharmony_ci vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ 4168c2ecf20Sopenharmony_ci vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ 4178c2ecf20Sopenharmony_ci get_key(i, 1, RK1); \ 4188c2ecf20Sopenharmony_ci vpslld $1, x1 ## 2, x4 ## 2; \ 4198c2ecf20Sopenharmony_ci vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ 4208c2ecf20Sopenharmony_ci vpor x4 ## 2, x1 ## 2, x1 ## 2; \ 4218c2ecf20Sopenharmony_ci vpslld $3, x0 ## 2, x4 ## 2; \ 4228c2ecf20Sopenharmony_ci vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ 4238c2ecf20Sopenharmony_ci vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ 4248c2ecf20Sopenharmony_ci get_key(i, 3, RK3); \ 4258c2ecf20Sopenharmony_ci vpslld $7, x3 ## 1, x4 ## 1; \ 4268c2ecf20Sopenharmony_ci vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ 4278c2ecf20Sopenharmony_ci vpor x4 ## 1, x3 ## 1, x3 ## 1; \ 4288c2ecf20Sopenharmony_ci vpslld $7, x1 ## 1, x4 ## 1; \ 4298c2ecf20Sopenharmony_ci vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ 4308c2ecf20Sopenharmony_ci vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ 4318c2ecf20Sopenharmony_ci vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ 4328c2ecf20Sopenharmony_ci vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ 4338c2ecf20Sopenharmony_ci get_key(i, 0, RK0); \ 4348c2ecf20Sopenharmony_ci vpslld $7, x3 ## 2, x4 ## 2; \ 4358c2ecf20Sopenharmony_ci vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ 4368c2ecf20Sopenharmony_ci vpor x4 ## 2, x3 ## 2, x3 ## 2; \ 4378c2ecf20Sopenharmony_ci vpslld $7, x1 ## 2, x4 ## 2; \ 4388c2ecf20Sopenharmony_ci vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ 4398c2ecf20Sopenharmony_ci vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ 4408c2ecf20Sopenharmony_ci vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ 4418c2ecf20Sopenharmony_ci vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ 4428c2ecf20Sopenharmony_ci get_key(i, 2, RK2); \ 4438c2ecf20Sopenharmony_ci vpxor RK1, x1 ## 1, x1 ## 1; \ 4448c2ecf20Sopenharmony_ci vpxor RK3, x3 ## 1, x3 ## 1; \ 4458c2ecf20Sopenharmony_ci vpslld $5, x0 ## 1, x4 ## 1; \ 4468c2ecf20Sopenharmony_ci vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ 4478c2ecf20Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 4488c2ecf20Sopenharmony_ci vpslld $22, x2 ## 1, x4 ## 1; \ 4498c2ecf20Sopenharmony_ci vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ 4508c2ecf20Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 4518c2ecf20Sopenharmony_ci vpxor RK0, x0 ## 1, x0 ## 1; \ 4528c2ecf20Sopenharmony_ci vpxor RK2, x2 ## 1, x2 ## 1; \ 4538c2ecf20Sopenharmony_ci vpxor RK1, x1 ## 2, x1 ## 2; \ 4548c2ecf20Sopenharmony_ci vpxor RK3, x3 ## 2, x3 ## 2; \ 4558c2ecf20Sopenharmony_ci vpslld $5, x0 ## 2, x4 ## 2; \ 4568c2ecf20Sopenharmony_ci vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ 4578c2ecf20Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 4588c2ecf20Sopenharmony_ci vpslld $22, x2 ## 2, x4 ## 2; \ 4598c2ecf20Sopenharmony_ci vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ 4608c2ecf20Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; \ 4618c2ecf20Sopenharmony_ci vpxor RK0, x0 ## 2, x0 ## 2; \ 4628c2ecf20Sopenharmony_ci vpxor RK2, x2 ## 2, x2 ## 2; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci#define KL2(x0, x1, x2, x3, x4, i) \ 4658c2ecf20Sopenharmony_ci vpxor RK0, x0 ## 1, x0 ## 1; \ 4668c2ecf20Sopenharmony_ci vpxor RK2, x2 ## 1, x2 ## 1; \ 4678c2ecf20Sopenharmony_ci vpsrld $5, x0 ## 1, x4 ## 1; \ 4688c2ecf20Sopenharmony_ci vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ 4698c2ecf20Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 4708c2ecf20Sopenharmony_ci vpxor RK3, x3 ## 1, x3 ## 1; \ 4718c2ecf20Sopenharmony_ci vpxor RK1, x1 ## 1, x1 ## 1; \ 4728c2ecf20Sopenharmony_ci vpsrld $22, x2 ## 1, x4 ## 1; \ 4738c2ecf20Sopenharmony_ci vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ 4748c2ecf20Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 4758c2ecf20Sopenharmony_ci vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ 4768c2ecf20Sopenharmony_ci vpxor RK0, x0 ## 2, x0 ## 2; \ 4778c2ecf20Sopenharmony_ci vpxor RK2, x2 ## 2, x2 ## 2; \ 4788c2ecf20Sopenharmony_ci vpsrld $5, x0 ## 2, x4 ## 2; \ 4798c2ecf20Sopenharmony_ci vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ 4808c2ecf20Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 4818c2ecf20Sopenharmony_ci vpxor RK3, x3 ## 2, x3 ## 2; \ 4828c2ecf20Sopenharmony_ci vpxor RK1, x1 ## 2, x1 ## 2; \ 4838c2ecf20Sopenharmony_ci vpsrld $22, x2 ## 2, x4 ## 2; \ 4848c2ecf20Sopenharmony_ci vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ 4858c2ecf20Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; \ 4868c2ecf20Sopenharmony_ci vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ 4878c2ecf20Sopenharmony_ci vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ 4888c2ecf20Sopenharmony_ci vpslld $7, x1 ## 1, x4 ## 1; \ 4898c2ecf20Sopenharmony_ci vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ 4908c2ecf20Sopenharmony_ci vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ 4918c2ecf20Sopenharmony_ci vpsrld $1, x1 ## 1, x4 ## 1; \ 4928c2ecf20Sopenharmony_ci vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ 4938c2ecf20Sopenharmony_ci vpor x4 ## 1, x1 ## 1, x1 ## 1; \ 4948c2ecf20Sopenharmony_ci vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ 4958c2ecf20Sopenharmony_ci vpslld $7, x1 ## 2, x4 ## 2; \ 4968c2ecf20Sopenharmony_ci vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ 4978c2ecf20Sopenharmony_ci vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ 4988c2ecf20Sopenharmony_ci vpsrld $1, x1 ## 2, x4 ## 2; \ 4998c2ecf20Sopenharmony_ci vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ 5008c2ecf20Sopenharmony_ci vpor x4 ## 2, x1 ## 2, x1 ## 2; \ 5018c2ecf20Sopenharmony_ci vpsrld $7, x3 ## 1, x4 ## 1; \ 5028c2ecf20Sopenharmony_ci vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ 5038c2ecf20Sopenharmony_ci vpor x4 ## 1, x3 ## 1, x3 ## 1; \ 5048c2ecf20Sopenharmony_ci vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ 5058c2ecf20Sopenharmony_ci vpslld $3, x0 ## 1, x4 ## 1; \ 5068c2ecf20Sopenharmony_ci vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ 5078c2ecf20Sopenharmony_ci vpsrld $7, x3 ## 2, x4 ## 2; \ 5088c2ecf20Sopenharmony_ci vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ 5098c2ecf20Sopenharmony_ci vpor x4 ## 2, x3 ## 2, x3 ## 2; \ 5108c2ecf20Sopenharmony_ci vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ 5118c2ecf20Sopenharmony_ci vpslld $3, x0 ## 2, x4 ## 2; \ 5128c2ecf20Sopenharmony_ci vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ 5138c2ecf20Sopenharmony_ci vpsrld $13, x0 ## 1, x4 ## 1; \ 5148c2ecf20Sopenharmony_ci vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ 5158c2ecf20Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 5168c2ecf20Sopenharmony_ci vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ 5178c2ecf20Sopenharmony_ci vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ 5188c2ecf20Sopenharmony_ci vpsrld $3, x2 ## 1, x4 ## 1; \ 5198c2ecf20Sopenharmony_ci vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ 5208c2ecf20Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 5218c2ecf20Sopenharmony_ci vpsrld $13, x0 ## 2, x4 ## 2; \ 5228c2ecf20Sopenharmony_ci vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ 5238c2ecf20Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 5248c2ecf20Sopenharmony_ci vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ 5258c2ecf20Sopenharmony_ci vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ 5268c2ecf20Sopenharmony_ci vpsrld $3, x2 ## 2, x4 ## 2; \ 5278c2ecf20Sopenharmony_ci vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ 5288c2ecf20Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci#define S(SBOX, x0, x1, x2, x3, x4) \ 5318c2ecf20Sopenharmony_ci SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 5328c2ecf20Sopenharmony_ci SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 5338c2ecf20Sopenharmony_ci SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 5348c2ecf20Sopenharmony_ci SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci#define SP(SBOX, x0, x1, x2, x3, x4, i) \ 5378c2ecf20Sopenharmony_ci get_key(i, 0, RK0); \ 5388c2ecf20Sopenharmony_ci SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 5398c2ecf20Sopenharmony_ci get_key(i, 2, RK2); \ 5408c2ecf20Sopenharmony_ci SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 5418c2ecf20Sopenharmony_ci get_key(i, 3, RK3); \ 5428c2ecf20Sopenharmony_ci SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 5438c2ecf20Sopenharmony_ci get_key(i, 1, RK1); \ 5448c2ecf20Sopenharmony_ci SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 5478c2ecf20Sopenharmony_ci vpunpckldq x1, x0, t0; \ 5488c2ecf20Sopenharmony_ci vpunpckhdq x1, x0, t2; \ 5498c2ecf20Sopenharmony_ci vpunpckldq x3, x2, t1; \ 5508c2ecf20Sopenharmony_ci vpunpckhdq x3, x2, x3; \ 5518c2ecf20Sopenharmony_ci \ 5528c2ecf20Sopenharmony_ci vpunpcklqdq t1, t0, x0; \ 5538c2ecf20Sopenharmony_ci vpunpckhqdq t1, t0, x1; \ 5548c2ecf20Sopenharmony_ci vpunpcklqdq x3, t2, x2; \ 5558c2ecf20Sopenharmony_ci vpunpckhqdq x3, t2, x3; 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ 5588c2ecf20Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ 5618c2ecf20Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci.align 8 5648c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(__serpent_enc_blk16) 5658c2ecf20Sopenharmony_ci /* input: 5668c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 5678c2ecf20Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext 5688c2ecf20Sopenharmony_ci * output: 5698c2ecf20Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext 5708c2ecf20Sopenharmony_ci */ 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci vpcmpeqd RNOT, RNOT, RNOT; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 5758c2ecf20Sopenharmony_ci read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci K2(RA, RB, RC, RD, RE, 0); 5788c2ecf20Sopenharmony_ci S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); 5798c2ecf20Sopenharmony_ci S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); 5808c2ecf20Sopenharmony_ci S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); 5818c2ecf20Sopenharmony_ci S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); 5828c2ecf20Sopenharmony_ci S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); 5838c2ecf20Sopenharmony_ci S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); 5848c2ecf20Sopenharmony_ci S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); 5858c2ecf20Sopenharmony_ci S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); 5868c2ecf20Sopenharmony_ci S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); 5878c2ecf20Sopenharmony_ci S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); 5888c2ecf20Sopenharmony_ci S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); 5898c2ecf20Sopenharmony_ci S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); 5908c2ecf20Sopenharmony_ci S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); 5918c2ecf20Sopenharmony_ci S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); 5928c2ecf20Sopenharmony_ci S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); 5938c2ecf20Sopenharmony_ci S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); 5948c2ecf20Sopenharmony_ci S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); 5958c2ecf20Sopenharmony_ci S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); 5968c2ecf20Sopenharmony_ci S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); 5978c2ecf20Sopenharmony_ci S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); 5988c2ecf20Sopenharmony_ci S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); 5998c2ecf20Sopenharmony_ci S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); 6008c2ecf20Sopenharmony_ci S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); 6018c2ecf20Sopenharmony_ci S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); 6028c2ecf20Sopenharmony_ci S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); 6038c2ecf20Sopenharmony_ci S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); 6048c2ecf20Sopenharmony_ci S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); 6058c2ecf20Sopenharmony_ci S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); 6068c2ecf20Sopenharmony_ci S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); 6078c2ecf20Sopenharmony_ci S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); 6088c2ecf20Sopenharmony_ci S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); 6098c2ecf20Sopenharmony_ci S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 6128c2ecf20Sopenharmony_ci write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci RET; 6158c2ecf20Sopenharmony_ciSYM_FUNC_END(__serpent_enc_blk16) 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci.align 8 6188c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(__serpent_dec_blk16) 6198c2ecf20Sopenharmony_ci /* input: 6208c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 6218c2ecf20Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext 6228c2ecf20Sopenharmony_ci * output: 6238c2ecf20Sopenharmony_ci * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext 6248c2ecf20Sopenharmony_ci */ 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci vpcmpeqd RNOT, RNOT, RNOT; 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 6298c2ecf20Sopenharmony_ci read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci K2(RA, RB, RC, RD, RE, 32); 6328c2ecf20Sopenharmony_ci SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); 6338c2ecf20Sopenharmony_ci SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); 6348c2ecf20Sopenharmony_ci SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); 6358c2ecf20Sopenharmony_ci SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); 6368c2ecf20Sopenharmony_ci SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); 6378c2ecf20Sopenharmony_ci SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); 6388c2ecf20Sopenharmony_ci SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); 6398c2ecf20Sopenharmony_ci SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); 6408c2ecf20Sopenharmony_ci SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); 6418c2ecf20Sopenharmony_ci SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); 6428c2ecf20Sopenharmony_ci SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); 6438c2ecf20Sopenharmony_ci SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); 6448c2ecf20Sopenharmony_ci SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); 6458c2ecf20Sopenharmony_ci SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); 6468c2ecf20Sopenharmony_ci SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); 6478c2ecf20Sopenharmony_ci SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); 6488c2ecf20Sopenharmony_ci SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); 6498c2ecf20Sopenharmony_ci SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); 6508c2ecf20Sopenharmony_ci SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); 6518c2ecf20Sopenharmony_ci SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); 6528c2ecf20Sopenharmony_ci SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); 6538c2ecf20Sopenharmony_ci SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); 6548c2ecf20Sopenharmony_ci SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); 6558c2ecf20Sopenharmony_ci SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); 6568c2ecf20Sopenharmony_ci SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); 6578c2ecf20Sopenharmony_ci SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); 6588c2ecf20Sopenharmony_ci SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); 6598c2ecf20Sopenharmony_ci SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); 6608c2ecf20Sopenharmony_ci SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); 6618c2ecf20Sopenharmony_ci SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); 6628c2ecf20Sopenharmony_ci SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); 6638c2ecf20Sopenharmony_ci S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); 6668c2ecf20Sopenharmony_ci write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci RET; 6698c2ecf20Sopenharmony_ciSYM_FUNC_END(__serpent_dec_blk16) 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_ecb_enc_16way) 6728c2ecf20Sopenharmony_ci /* input: 6738c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 6748c2ecf20Sopenharmony_ci * %rsi: dst 6758c2ecf20Sopenharmony_ci * %rdx: src 6768c2ecf20Sopenharmony_ci */ 6778c2ecf20Sopenharmony_ci FRAME_BEGIN 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci vzeroupper; 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci call __serpent_enc_blk16; 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci vzeroupper; 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci FRAME_END 6908c2ecf20Sopenharmony_ci RET; 6918c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_ecb_enc_16way) 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_ecb_dec_16way) 6948c2ecf20Sopenharmony_ci /* input: 6958c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 6968c2ecf20Sopenharmony_ci * %rsi: dst 6978c2ecf20Sopenharmony_ci * %rdx: src 6988c2ecf20Sopenharmony_ci */ 6998c2ecf20Sopenharmony_ci FRAME_BEGIN 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci vzeroupper; 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci call __serpent_dec_blk16; 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci vzeroupper; 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci FRAME_END 7128c2ecf20Sopenharmony_ci RET; 7138c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_ecb_dec_16way) 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_cbc_dec_16way) 7168c2ecf20Sopenharmony_ci /* input: 7178c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 7188c2ecf20Sopenharmony_ci * %rsi: dst 7198c2ecf20Sopenharmony_ci * %rdx: src 7208c2ecf20Sopenharmony_ci */ 7218c2ecf20Sopenharmony_ci FRAME_BEGIN 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci vzeroupper; 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci call __serpent_dec_blk16; 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2, 7308c2ecf20Sopenharmony_ci RK0); 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci vzeroupper; 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci FRAME_END 7358c2ecf20Sopenharmony_ci RET; 7368c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_cbc_dec_16way) 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_ctr_16way) 7398c2ecf20Sopenharmony_ci /* input: 7408c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 7418c2ecf20Sopenharmony_ci * %rsi: dst (16 blocks) 7428c2ecf20Sopenharmony_ci * %rdx: src (16 blocks) 7438c2ecf20Sopenharmony_ci * %rcx: iv (little endian, 128bit) 7448c2ecf20Sopenharmony_ci */ 7458c2ecf20Sopenharmony_ci FRAME_BEGIN 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci vzeroupper; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, 7508c2ecf20Sopenharmony_ci RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, 7518c2ecf20Sopenharmony_ci tp); 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci call __serpent_enc_blk16; 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci vzeroupper; 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci FRAME_END 7608c2ecf20Sopenharmony_ci RET; 7618c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_ctr_16way) 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_xts_enc_16way) 7648c2ecf20Sopenharmony_ci /* input: 7658c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 7668c2ecf20Sopenharmony_ci * %rsi: dst (16 blocks) 7678c2ecf20Sopenharmony_ci * %rdx: src (16 blocks) 7688c2ecf20Sopenharmony_ci * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 7698c2ecf20Sopenharmony_ci */ 7708c2ecf20Sopenharmony_ci FRAME_BEGIN 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci vzeroupper; 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, 7758c2ecf20Sopenharmony_ci RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, 7768c2ecf20Sopenharmony_ci .Lxts_gf128mul_and_shl1_mask_0, 7778c2ecf20Sopenharmony_ci .Lxts_gf128mul_and_shl1_mask_1); 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci call __serpent_enc_blk16; 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci vzeroupper; 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci FRAME_END 7868c2ecf20Sopenharmony_ci RET; 7878c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_xts_enc_16way) 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ciSYM_FUNC_START(serpent_xts_dec_16way) 7908c2ecf20Sopenharmony_ci /* input: 7918c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 7928c2ecf20Sopenharmony_ci * %rsi: dst (16 blocks) 7938c2ecf20Sopenharmony_ci * %rdx: src (16 blocks) 7948c2ecf20Sopenharmony_ci * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 7958c2ecf20Sopenharmony_ci */ 7968c2ecf20Sopenharmony_ci FRAME_BEGIN 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci vzeroupper; 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, 8018c2ecf20Sopenharmony_ci RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, 8028c2ecf20Sopenharmony_ci .Lxts_gf128mul_and_shl1_mask_0, 8038c2ecf20Sopenharmony_ci .Lxts_gf128mul_and_shl1_mask_1); 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci call __serpent_dec_blk16; 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci vzeroupper; 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci FRAME_END 8128c2ecf20Sopenharmony_ci RET; 8138c2ecf20Sopenharmony_ciSYM_FUNC_END(serpent_xts_dec_16way) 814