162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * x86_64/AVX2 assembler optimized version of Serpent 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Based on AVX assembler implementation of Serpent by: 862306a36Sopenharmony_ci * Copyright © 2012 Johannes Goetzfried 962306a36Sopenharmony_ci * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/linkage.h> 1362306a36Sopenharmony_ci#include <asm/frame.h> 1462306a36Sopenharmony_ci#include "glue_helper-asm-avx2.S" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci.file "serpent-avx2-asm_64.S" 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 1962306a36Sopenharmony_ci.align 16 2062306a36Sopenharmony_ci.Lbswap128_mask: 2162306a36Sopenharmony_ci .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci.text 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define CTX %rdi 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define RNOT %ymm0 2862306a36Sopenharmony_ci#define tp %ymm1 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#define RA1 %ymm2 3162306a36Sopenharmony_ci#define RA2 %ymm3 3262306a36Sopenharmony_ci#define RB1 %ymm4 3362306a36Sopenharmony_ci#define RB2 %ymm5 3462306a36Sopenharmony_ci#define RC1 %ymm6 3562306a36Sopenharmony_ci#define RC2 %ymm7 3662306a36Sopenharmony_ci#define RD1 %ymm8 3762306a36Sopenharmony_ci#define RD2 %ymm9 3862306a36Sopenharmony_ci#define RE1 %ymm10 3962306a36Sopenharmony_ci#define RE2 %ymm11 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define RK0 %ymm12 4262306a36Sopenharmony_ci#define RK1 %ymm13 4362306a36Sopenharmony_ci#define RK2 %ymm14 4462306a36Sopenharmony_ci#define RK3 %ymm15 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define RK0x %xmm12 4762306a36Sopenharmony_ci#define RK1x %xmm13 4862306a36Sopenharmony_ci#define RK2x %xmm14 4962306a36Sopenharmony_ci#define RK3x %xmm15 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci#define S0_1(x0, x1, x2, x3, x4) \ 5262306a36Sopenharmony_ci vpor x0, x3, tp; \ 5362306a36Sopenharmony_ci vpxor x3, x0, x0; \ 5462306a36Sopenharmony_ci vpxor x2, x3, x4; \ 5562306a36Sopenharmony_ci vpxor RNOT, x4, x4; \ 5662306a36Sopenharmony_ci vpxor x1, tp, x3; \ 5762306a36Sopenharmony_ci vpand x0, x1, x1; \ 5862306a36Sopenharmony_ci vpxor x4, x1, x1; \ 5962306a36Sopenharmony_ci vpxor x0, x2, x2; 6062306a36Sopenharmony_ci#define S0_2(x0, x1, x2, x3, x4) \ 6162306a36Sopenharmony_ci vpxor x3, x0, x0; \ 6262306a36Sopenharmony_ci vpor x0, x4, x4; \ 6362306a36Sopenharmony_ci vpxor x2, x0, x0; \ 6462306a36Sopenharmony_ci vpand x1, x2, x2; \ 6562306a36Sopenharmony_ci vpxor x2, x3, x3; \ 6662306a36Sopenharmony_ci vpxor RNOT, x1, x1; \ 6762306a36Sopenharmony_ci vpxor x4, x2, x2; \ 6862306a36Sopenharmony_ci vpxor x2, x1, x1; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci#define S1_1(x0, x1, x2, x3, x4) \ 7162306a36Sopenharmony_ci vpxor x0, x1, tp; \ 7262306a36Sopenharmony_ci vpxor x3, x0, x0; \ 7362306a36Sopenharmony_ci vpxor RNOT, x3, x3; \ 7462306a36Sopenharmony_ci vpand tp, x1, x4; \ 7562306a36Sopenharmony_ci vpor tp, x0, x0; \ 7662306a36Sopenharmony_ci vpxor x2, x3, x3; \ 7762306a36Sopenharmony_ci vpxor x3, x0, x0; \ 7862306a36Sopenharmony_ci vpxor x3, tp, x1; 7962306a36Sopenharmony_ci#define S1_2(x0, x1, x2, x3, x4) \ 8062306a36Sopenharmony_ci vpxor x4, x3, x3; \ 8162306a36Sopenharmony_ci vpor x4, x1, x1; \ 8262306a36Sopenharmony_ci vpxor x2, x4, x4; \ 8362306a36Sopenharmony_ci vpand x0, x2, x2; \ 8462306a36Sopenharmony_ci vpxor x1, x2, x2; \ 8562306a36Sopenharmony_ci vpor x0, x1, x1; \ 8662306a36Sopenharmony_ci vpxor RNOT, x0, x0; \ 8762306a36Sopenharmony_ci vpxor x2, x0, x0; \ 8862306a36Sopenharmony_ci vpxor x1, x4, x4; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci#define S2_1(x0, x1, x2, x3, x4) \ 9162306a36Sopenharmony_ci vpxor RNOT, x3, x3; \ 9262306a36Sopenharmony_ci vpxor x0, x1, x1; \ 9362306a36Sopenharmony_ci vpand x2, x0, tp; \ 9462306a36Sopenharmony_ci vpxor x3, tp, tp; \ 9562306a36Sopenharmony_ci vpor x0, x3, x3; \ 9662306a36Sopenharmony_ci vpxor x1, x2, x2; \ 9762306a36Sopenharmony_ci vpxor x1, x3, x3; \ 9862306a36Sopenharmony_ci vpand tp, x1, x1; 9962306a36Sopenharmony_ci#define S2_2(x0, x1, x2, x3, x4) \ 10062306a36Sopenharmony_ci vpxor x2, tp, tp; \ 10162306a36Sopenharmony_ci vpand x3, x2, x2; \ 10262306a36Sopenharmony_ci vpor x1, x3, x3; \ 10362306a36Sopenharmony_ci vpxor RNOT, tp, tp; \ 10462306a36Sopenharmony_ci vpxor tp, x3, x3; \ 10562306a36Sopenharmony_ci vpxor tp, x0, x4; \ 10662306a36Sopenharmony_ci vpxor x2, tp, x0; \ 10762306a36Sopenharmony_ci vpor x2, x1, x1; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci#define S3_1(x0, x1, x2, x3, x4) \ 11062306a36Sopenharmony_ci vpxor x3, x1, tp; \ 11162306a36Sopenharmony_ci vpor x0, x3, x3; \ 11262306a36Sopenharmony_ci vpand x0, x1, x4; \ 11362306a36Sopenharmony_ci vpxor x2, x0, x0; \ 11462306a36Sopenharmony_ci vpxor tp, x2, x2; \ 11562306a36Sopenharmony_ci vpand x3, tp, x1; \ 11662306a36Sopenharmony_ci vpxor x3, x2, x2; \ 11762306a36Sopenharmony_ci vpor x4, x0, x0; \ 11862306a36Sopenharmony_ci vpxor x3, x4, x4; 11962306a36Sopenharmony_ci#define S3_2(x0, x1, x2, x3, x4) \ 12062306a36Sopenharmony_ci vpxor x0, x1, x1; \ 12162306a36Sopenharmony_ci vpand x3, x0, x0; \ 12262306a36Sopenharmony_ci vpand x4, x3, x3; \ 12362306a36Sopenharmony_ci vpxor x2, x3, x3; \ 12462306a36Sopenharmony_ci vpor x1, x4, x4; \ 12562306a36Sopenharmony_ci vpand x1, x2, x2; \ 12662306a36Sopenharmony_ci vpxor x3, x4, x4; \ 12762306a36Sopenharmony_ci vpxor x3, x0, x0; \ 12862306a36Sopenharmony_ci vpxor x2, x3, x3; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci#define S4_1(x0, x1, x2, x3, x4) \ 13162306a36Sopenharmony_ci vpand x0, x3, tp; \ 13262306a36Sopenharmony_ci vpxor x3, x0, x0; \ 13362306a36Sopenharmony_ci vpxor x2, tp, tp; \ 13462306a36Sopenharmony_ci vpor x3, x2, x2; \ 13562306a36Sopenharmony_ci vpxor x1, x0, x0; \ 13662306a36Sopenharmony_ci vpxor tp, x3, x4; \ 13762306a36Sopenharmony_ci vpor x0, x2, x2; \ 13862306a36Sopenharmony_ci vpxor x1, x2, x2; 13962306a36Sopenharmony_ci#define S4_2(x0, x1, x2, x3, x4) \ 14062306a36Sopenharmony_ci vpand x0, x1, x1; \ 14162306a36Sopenharmony_ci vpxor x4, x1, x1; \ 14262306a36Sopenharmony_ci vpand x2, x4, x4; \ 14362306a36Sopenharmony_ci vpxor tp, x2, x2; \ 14462306a36Sopenharmony_ci vpxor x0, x4, x4; \ 14562306a36Sopenharmony_ci vpor x1, tp, x3; \ 14662306a36Sopenharmony_ci vpxor RNOT, x1, x1; \ 14762306a36Sopenharmony_ci vpxor x0, x3, x3; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci#define S5_1(x0, x1, x2, x3, x4) \ 15062306a36Sopenharmony_ci vpor x0, x1, tp; \ 15162306a36Sopenharmony_ci vpxor tp, x2, x2; \ 15262306a36Sopenharmony_ci vpxor RNOT, x3, x3; \ 15362306a36Sopenharmony_ci vpxor x0, x1, x4; \ 15462306a36Sopenharmony_ci vpxor x2, x0, x0; \ 15562306a36Sopenharmony_ci vpand x4, tp, x1; \ 15662306a36Sopenharmony_ci vpor x3, x4, x4; \ 15762306a36Sopenharmony_ci vpxor x0, x4, x4; 15862306a36Sopenharmony_ci#define S5_2(x0, x1, x2, x3, x4) \ 15962306a36Sopenharmony_ci vpand x3, x0, x0; \ 16062306a36Sopenharmony_ci vpxor x3, x1, x1; \ 16162306a36Sopenharmony_ci vpxor x2, x3, x3; \ 16262306a36Sopenharmony_ci vpxor x1, x0, x0; \ 16362306a36Sopenharmony_ci vpand x4, x2, x2; \ 16462306a36Sopenharmony_ci vpxor x2, x1, x1; \ 16562306a36Sopenharmony_ci vpand x0, x2, x2; \ 16662306a36Sopenharmony_ci vpxor x2, x3, x3; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci#define S6_1(x0, x1, x2, x3, x4) \ 16962306a36Sopenharmony_ci vpxor x0, x3, x3; \ 17062306a36Sopenharmony_ci vpxor x2, x1, tp; \ 17162306a36Sopenharmony_ci vpxor x0, x2, x2; \ 17262306a36Sopenharmony_ci vpand x3, x0, x0; \ 17362306a36Sopenharmony_ci vpor x3, tp, tp; \ 17462306a36Sopenharmony_ci vpxor RNOT, x1, x4; \ 17562306a36Sopenharmony_ci vpxor tp, x0, x0; \ 17662306a36Sopenharmony_ci vpxor x2, tp, x1; 17762306a36Sopenharmony_ci#define S6_2(x0, x1, x2, x3, x4) \ 17862306a36Sopenharmony_ci vpxor x4, x3, x3; \ 17962306a36Sopenharmony_ci vpxor x0, x4, x4; \ 18062306a36Sopenharmony_ci vpand x0, x2, x2; \ 18162306a36Sopenharmony_ci vpxor x1, x4, x4; \ 18262306a36Sopenharmony_ci vpxor x3, x2, x2; \ 18362306a36Sopenharmony_ci vpand x1, x3, x3; \ 18462306a36Sopenharmony_ci vpxor x0, x3, x3; \ 18562306a36Sopenharmony_ci vpxor x2, x1, x1; 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci#define S7_1(x0, x1, x2, x3, x4) \ 18862306a36Sopenharmony_ci vpxor RNOT, x1, tp; \ 18962306a36Sopenharmony_ci vpxor RNOT, x0, x0; \ 19062306a36Sopenharmony_ci vpand x2, tp, x1; \ 19162306a36Sopenharmony_ci vpxor x3, x1, x1; \ 19262306a36Sopenharmony_ci vpor tp, x3, x3; \ 19362306a36Sopenharmony_ci vpxor x2, tp, x4; \ 19462306a36Sopenharmony_ci vpxor x3, x2, x2; \ 19562306a36Sopenharmony_ci vpxor x0, x3, x3; \ 19662306a36Sopenharmony_ci vpor x1, x0, x0; 19762306a36Sopenharmony_ci#define S7_2(x0, x1, x2, x3, x4) \ 19862306a36Sopenharmony_ci vpand x0, x2, x2; \ 19962306a36Sopenharmony_ci vpxor x4, x0, x0; \ 20062306a36Sopenharmony_ci vpxor x3, x4, x4; \ 20162306a36Sopenharmony_ci vpand x0, x3, x3; \ 20262306a36Sopenharmony_ci vpxor x1, x4, x4; \ 20362306a36Sopenharmony_ci vpxor x4, x2, x2; \ 20462306a36Sopenharmony_ci vpxor x1, x3, x3; \ 20562306a36Sopenharmony_ci vpor x0, x4, x4; \ 20662306a36Sopenharmony_ci vpxor x1, x4, x4; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci#define SI0_1(x0, x1, x2, x3, x4) \ 20962306a36Sopenharmony_ci vpxor x0, x1, x1; \ 21062306a36Sopenharmony_ci vpor x1, x3, tp; \ 21162306a36Sopenharmony_ci vpxor x1, x3, x4; \ 21262306a36Sopenharmony_ci vpxor RNOT, x0, x0; \ 21362306a36Sopenharmony_ci vpxor tp, x2, x2; \ 21462306a36Sopenharmony_ci vpxor x0, tp, x3; \ 21562306a36Sopenharmony_ci vpand x1, x0, x0; \ 21662306a36Sopenharmony_ci vpxor x2, x0, x0; 21762306a36Sopenharmony_ci#define SI0_2(x0, x1, x2, x3, x4) \ 21862306a36Sopenharmony_ci vpand x3, x2, x2; \ 21962306a36Sopenharmony_ci vpxor x4, x3, x3; \ 22062306a36Sopenharmony_ci vpxor x3, x2, x2; \ 22162306a36Sopenharmony_ci vpxor x3, x1, x1; \ 22262306a36Sopenharmony_ci vpand x0, x3, x3; \ 22362306a36Sopenharmony_ci vpxor x0, x1, x1; \ 22462306a36Sopenharmony_ci vpxor x2, x0, x0; \ 22562306a36Sopenharmony_ci vpxor x3, x4, x4; 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci#define SI1_1(x0, x1, x2, x3, x4) \ 22862306a36Sopenharmony_ci vpxor x3, x1, x1; \ 22962306a36Sopenharmony_ci vpxor x2, x0, tp; \ 23062306a36Sopenharmony_ci vpxor RNOT, x2, x2; \ 23162306a36Sopenharmony_ci vpor x1, x0, x4; \ 23262306a36Sopenharmony_ci vpxor x3, x4, x4; \ 23362306a36Sopenharmony_ci vpand x1, x3, x3; \ 23462306a36Sopenharmony_ci vpxor x2, x1, x1; \ 23562306a36Sopenharmony_ci vpand x4, x2, x2; 23662306a36Sopenharmony_ci#define SI1_2(x0, x1, x2, x3, x4) \ 23762306a36Sopenharmony_ci vpxor x1, x4, x4; \ 23862306a36Sopenharmony_ci vpor x3, x1, x1; \ 23962306a36Sopenharmony_ci vpxor tp, x3, x3; \ 24062306a36Sopenharmony_ci vpxor tp, x2, x2; \ 24162306a36Sopenharmony_ci vpor x4, tp, x0; \ 24262306a36Sopenharmony_ci vpxor x4, x2, x2; \ 24362306a36Sopenharmony_ci vpxor x0, x1, x1; \ 24462306a36Sopenharmony_ci vpxor x1, x4, x4; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci#define SI2_1(x0, x1, x2, x3, x4) \ 24762306a36Sopenharmony_ci vpxor x1, x2, x2; \ 24862306a36Sopenharmony_ci vpxor RNOT, x3, tp; \ 24962306a36Sopenharmony_ci vpor x2, tp, tp; \ 25062306a36Sopenharmony_ci vpxor x3, x2, x2; \ 25162306a36Sopenharmony_ci vpxor x0, x3, x4; \ 25262306a36Sopenharmony_ci vpxor x1, tp, x3; \ 25362306a36Sopenharmony_ci vpor x2, x1, x1; \ 25462306a36Sopenharmony_ci vpxor x0, x2, x2; 25562306a36Sopenharmony_ci#define SI2_2(x0, x1, x2, x3, x4) \ 25662306a36Sopenharmony_ci vpxor x4, x1, x1; \ 25762306a36Sopenharmony_ci vpor x3, x4, x4; \ 25862306a36Sopenharmony_ci vpxor x3, x2, x2; \ 25962306a36Sopenharmony_ci vpxor x2, x4, x4; \ 26062306a36Sopenharmony_ci vpand x1, x2, x2; \ 26162306a36Sopenharmony_ci vpxor x3, x2, x2; \ 26262306a36Sopenharmony_ci vpxor x4, x3, x3; \ 26362306a36Sopenharmony_ci vpxor x0, x4, x4; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci#define SI3_1(x0, x1, x2, x3, x4) \ 26662306a36Sopenharmony_ci vpxor x1, x2, x2; \ 26762306a36Sopenharmony_ci vpand x2, x1, tp; \ 26862306a36Sopenharmony_ci vpxor x0, tp, tp; \ 26962306a36Sopenharmony_ci vpor x1, x0, x0; \ 27062306a36Sopenharmony_ci vpxor x3, x1, x4; \ 27162306a36Sopenharmony_ci vpxor x3, x0, x0; \ 27262306a36Sopenharmony_ci vpor tp, x3, x3; \ 27362306a36Sopenharmony_ci vpxor x2, tp, x1; 27462306a36Sopenharmony_ci#define SI3_2(x0, x1, x2, x3, x4) \ 27562306a36Sopenharmony_ci vpxor x3, x1, x1; \ 27662306a36Sopenharmony_ci vpxor x2, x0, x0; \ 27762306a36Sopenharmony_ci vpxor x3, x2, x2; \ 27862306a36Sopenharmony_ci vpand x1, x3, x3; \ 27962306a36Sopenharmony_ci vpxor x0, x1, x1; \ 28062306a36Sopenharmony_ci vpand x2, x0, x0; \ 28162306a36Sopenharmony_ci vpxor x3, x4, x4; \ 28262306a36Sopenharmony_ci vpxor x0, x3, x3; \ 28362306a36Sopenharmony_ci vpxor x1, x0, x0; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci#define SI4_1(x0, x1, x2, x3, x4) \ 28662306a36Sopenharmony_ci vpxor x3, x2, x2; \ 28762306a36Sopenharmony_ci vpand x1, x0, tp; \ 28862306a36Sopenharmony_ci vpxor x2, tp, tp; \ 28962306a36Sopenharmony_ci vpor x3, x2, x2; \ 29062306a36Sopenharmony_ci vpxor RNOT, x0, x4; \ 29162306a36Sopenharmony_ci vpxor tp, x1, x1; \ 29262306a36Sopenharmony_ci vpxor x2, tp, x0; \ 29362306a36Sopenharmony_ci vpand x4, x2, x2; 29462306a36Sopenharmony_ci#define SI4_2(x0, x1, x2, x3, x4) \ 29562306a36Sopenharmony_ci vpxor x0, x2, x2; \ 29662306a36Sopenharmony_ci vpor x4, x0, x0; \ 29762306a36Sopenharmony_ci vpxor x3, x0, x0; \ 29862306a36Sopenharmony_ci vpand x2, x3, x3; \ 29962306a36Sopenharmony_ci vpxor x3, x4, x4; \ 30062306a36Sopenharmony_ci vpxor x1, x3, x3; \ 30162306a36Sopenharmony_ci vpand x0, x1, x1; \ 30262306a36Sopenharmony_ci vpxor x1, x4, x4; \ 30362306a36Sopenharmony_ci vpxor x3, x0, x0; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci#define SI5_1(x0, x1, x2, x3, x4) \ 30662306a36Sopenharmony_ci vpor x2, x1, tp; \ 30762306a36Sopenharmony_ci vpxor x1, x2, x2; \ 30862306a36Sopenharmony_ci vpxor x3, tp, tp; \ 30962306a36Sopenharmony_ci vpand x1, x3, x3; \ 31062306a36Sopenharmony_ci vpxor x3, x2, x2; \ 31162306a36Sopenharmony_ci vpor x0, x3, x3; \ 31262306a36Sopenharmony_ci vpxor RNOT, x0, x0; \ 31362306a36Sopenharmony_ci vpxor x2, x3, x3; \ 31462306a36Sopenharmony_ci vpor x0, x2, x2; 31562306a36Sopenharmony_ci#define SI5_2(x0, x1, x2, x3, x4) \ 31662306a36Sopenharmony_ci vpxor tp, x1, x4; \ 31762306a36Sopenharmony_ci vpxor x4, x2, x2; \ 31862306a36Sopenharmony_ci vpand x0, x4, x4; \ 31962306a36Sopenharmony_ci vpxor tp, x0, x0; \ 32062306a36Sopenharmony_ci vpxor x3, tp, x1; \ 32162306a36Sopenharmony_ci vpand x2, x0, x0; \ 32262306a36Sopenharmony_ci vpxor x3, x2, x2; \ 32362306a36Sopenharmony_ci vpxor x2, x0, x0; \ 32462306a36Sopenharmony_ci vpxor x4, x2, x2; \ 32562306a36Sopenharmony_ci vpxor x3, x4, x4; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci#define SI6_1(x0, x1, x2, x3, x4) \ 32862306a36Sopenharmony_ci vpxor x2, x0, x0; \ 32962306a36Sopenharmony_ci vpand x3, x0, tp; \ 33062306a36Sopenharmony_ci vpxor x3, x2, x2; \ 33162306a36Sopenharmony_ci vpxor x2, tp, tp; \ 33262306a36Sopenharmony_ci vpxor x1, x3, x3; \ 33362306a36Sopenharmony_ci vpor x0, x2, x2; \ 33462306a36Sopenharmony_ci vpxor x3, x2, x2; \ 33562306a36Sopenharmony_ci vpand tp, x3, x3; 33662306a36Sopenharmony_ci#define SI6_2(x0, x1, x2, x3, x4) \ 33762306a36Sopenharmony_ci vpxor RNOT, tp, tp; \ 33862306a36Sopenharmony_ci vpxor x1, x3, x3; \ 33962306a36Sopenharmony_ci vpand x2, x1, x1; \ 34062306a36Sopenharmony_ci vpxor tp, x0, x4; \ 34162306a36Sopenharmony_ci vpxor x4, x3, x3; \ 34262306a36Sopenharmony_ci vpxor x2, x4, x4; \ 34362306a36Sopenharmony_ci vpxor x1, tp, x0; \ 34462306a36Sopenharmony_ci vpxor x0, x2, x2; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci#define SI7_1(x0, x1, x2, x3, x4) \ 34762306a36Sopenharmony_ci vpand x0, x3, tp; \ 34862306a36Sopenharmony_ci vpxor x2, x0, x0; \ 34962306a36Sopenharmony_ci vpor x3, x2, x2; \ 35062306a36Sopenharmony_ci vpxor x1, x3, x4; \ 35162306a36Sopenharmony_ci vpxor RNOT, x0, x0; \ 35262306a36Sopenharmony_ci vpor tp, x1, x1; \ 35362306a36Sopenharmony_ci vpxor x0, x4, x4; \ 35462306a36Sopenharmony_ci vpand x2, x0, x0; \ 35562306a36Sopenharmony_ci vpxor x1, x0, x0; 35662306a36Sopenharmony_ci#define SI7_2(x0, x1, x2, x3, x4) \ 35762306a36Sopenharmony_ci vpand x2, x1, x1; \ 35862306a36Sopenharmony_ci vpxor x2, tp, x3; \ 35962306a36Sopenharmony_ci vpxor x3, x4, x4; \ 36062306a36Sopenharmony_ci vpand x3, x2, x2; \ 36162306a36Sopenharmony_ci vpor x0, x3, x3; \ 36262306a36Sopenharmony_ci vpxor x4, x1, x1; \ 36362306a36Sopenharmony_ci vpxor x4, x3, x3; \ 36462306a36Sopenharmony_ci vpand x0, x4, x4; \ 36562306a36Sopenharmony_ci vpxor x2, x4, x4; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci#define get_key(i,j,t) \ 36862306a36Sopenharmony_ci vpbroadcastd (4*(i)+(j))*4(CTX), t; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci#define K2(x0, x1, x2, x3, x4, i) \ 37162306a36Sopenharmony_ci get_key(i, 0, RK0); \ 37262306a36Sopenharmony_ci get_key(i, 1, RK1); \ 37362306a36Sopenharmony_ci get_key(i, 2, RK2); \ 37462306a36Sopenharmony_ci get_key(i, 3, RK3); \ 37562306a36Sopenharmony_ci vpxor RK0, x0 ## 1, x0 ## 1; \ 37662306a36Sopenharmony_ci vpxor RK1, x1 ## 1, x1 ## 1; \ 37762306a36Sopenharmony_ci vpxor RK2, x2 ## 1, x2 ## 1; \ 37862306a36Sopenharmony_ci vpxor RK3, x3 ## 1, x3 ## 1; \ 37962306a36Sopenharmony_ci vpxor RK0, x0 ## 2, x0 ## 2; \ 38062306a36Sopenharmony_ci vpxor RK1, x1 ## 2, x1 ## 2; \ 38162306a36Sopenharmony_ci vpxor RK2, x2 ## 2, x2 ## 2; \ 38262306a36Sopenharmony_ci vpxor RK3, x3 ## 2, x3 ## 2; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci#define LK2(x0, x1, x2, x3, x4, i) \ 38562306a36Sopenharmony_ci vpslld $13, x0 ## 1, x4 ## 1; \ 38662306a36Sopenharmony_ci vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ 38762306a36Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 38862306a36Sopenharmony_ci vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ 38962306a36Sopenharmony_ci vpslld $3, x2 ## 1, x4 ## 1; \ 39062306a36Sopenharmony_ci vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ 39162306a36Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 39262306a36Sopenharmony_ci vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ 39362306a36Sopenharmony_ci vpslld $13, x0 ## 2, x4 ## 2; \ 39462306a36Sopenharmony_ci vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ 39562306a36Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 39662306a36Sopenharmony_ci vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ 39762306a36Sopenharmony_ci vpslld $3, x2 ## 2, x4 ## 2; \ 39862306a36Sopenharmony_ci vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ 39962306a36Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; \ 40062306a36Sopenharmony_ci vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ 40162306a36Sopenharmony_ci vpslld $1, x1 ## 1, x4 ## 1; \ 40262306a36Sopenharmony_ci vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ 40362306a36Sopenharmony_ci vpor x4 ## 1, x1 ## 1, x1 ## 1; \ 40462306a36Sopenharmony_ci vpslld $3, x0 ## 1, x4 ## 1; \ 40562306a36Sopenharmony_ci vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ 40662306a36Sopenharmony_ci vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ 40762306a36Sopenharmony_ci get_key(i, 1, RK1); \ 40862306a36Sopenharmony_ci vpslld $1, x1 ## 2, x4 ## 2; \ 40962306a36Sopenharmony_ci vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ 41062306a36Sopenharmony_ci vpor x4 ## 2, x1 ## 2, x1 ## 2; \ 41162306a36Sopenharmony_ci vpslld $3, x0 ## 2, x4 ## 2; \ 41262306a36Sopenharmony_ci vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ 41362306a36Sopenharmony_ci vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ 41462306a36Sopenharmony_ci get_key(i, 3, RK3); \ 41562306a36Sopenharmony_ci vpslld $7, x3 ## 1, x4 ## 1; \ 41662306a36Sopenharmony_ci vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ 41762306a36Sopenharmony_ci vpor x4 ## 1, x3 ## 1, x3 ## 1; \ 41862306a36Sopenharmony_ci vpslld $7, x1 ## 1, x4 ## 1; \ 41962306a36Sopenharmony_ci vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ 42062306a36Sopenharmony_ci vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ 42162306a36Sopenharmony_ci vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ 42262306a36Sopenharmony_ci vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ 42362306a36Sopenharmony_ci get_key(i, 0, RK0); \ 42462306a36Sopenharmony_ci vpslld $7, x3 ## 2, x4 ## 2; \ 42562306a36Sopenharmony_ci vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ 42662306a36Sopenharmony_ci vpor x4 ## 2, x3 ## 2, x3 ## 2; \ 42762306a36Sopenharmony_ci vpslld $7, x1 ## 2, x4 ## 2; \ 42862306a36Sopenharmony_ci vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ 42962306a36Sopenharmony_ci vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ 43062306a36Sopenharmony_ci vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ 43162306a36Sopenharmony_ci vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ 43262306a36Sopenharmony_ci get_key(i, 2, RK2); \ 43362306a36Sopenharmony_ci vpxor RK1, x1 ## 1, x1 ## 1; \ 43462306a36Sopenharmony_ci vpxor RK3, x3 ## 1, x3 ## 1; \ 43562306a36Sopenharmony_ci vpslld $5, x0 ## 1, x4 ## 1; \ 43662306a36Sopenharmony_ci vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ 43762306a36Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 43862306a36Sopenharmony_ci vpslld $22, x2 ## 1, x4 ## 1; \ 43962306a36Sopenharmony_ci vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ 44062306a36Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 44162306a36Sopenharmony_ci vpxor RK0, x0 ## 1, x0 ## 1; \ 44262306a36Sopenharmony_ci vpxor RK2, x2 ## 1, x2 ## 1; \ 44362306a36Sopenharmony_ci vpxor RK1, x1 ## 2, x1 ## 2; \ 44462306a36Sopenharmony_ci vpxor RK3, x3 ## 2, x3 ## 2; \ 44562306a36Sopenharmony_ci vpslld $5, x0 ## 2, x4 ## 2; \ 44662306a36Sopenharmony_ci vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ 44762306a36Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 44862306a36Sopenharmony_ci vpslld $22, x2 ## 2, x4 ## 2; \ 44962306a36Sopenharmony_ci vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ 45062306a36Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; \ 45162306a36Sopenharmony_ci vpxor RK0, x0 ## 2, x0 ## 2; \ 45262306a36Sopenharmony_ci vpxor RK2, x2 ## 2, x2 ## 2; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci#define KL2(x0, x1, x2, x3, x4, i) \ 45562306a36Sopenharmony_ci vpxor RK0, x0 ## 1, x0 ## 1; \ 45662306a36Sopenharmony_ci vpxor RK2, x2 ## 1, x2 ## 1; \ 45762306a36Sopenharmony_ci vpsrld $5, x0 ## 1, x4 ## 1; \ 45862306a36Sopenharmony_ci vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ 45962306a36Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 46062306a36Sopenharmony_ci vpxor RK3, x3 ## 1, x3 ## 1; \ 46162306a36Sopenharmony_ci vpxor RK1, x1 ## 1, x1 ## 1; \ 46262306a36Sopenharmony_ci vpsrld $22, x2 ## 1, x4 ## 1; \ 46362306a36Sopenharmony_ci vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ 46462306a36Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 46562306a36Sopenharmony_ci vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ 46662306a36Sopenharmony_ci vpxor RK0, x0 ## 2, x0 ## 2; \ 46762306a36Sopenharmony_ci vpxor RK2, x2 ## 2, x2 ## 2; \ 46862306a36Sopenharmony_ci vpsrld $5, x0 ## 2, x4 ## 2; \ 46962306a36Sopenharmony_ci vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ 47062306a36Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 47162306a36Sopenharmony_ci vpxor RK3, x3 ## 2, x3 ## 2; \ 47262306a36Sopenharmony_ci vpxor RK1, x1 ## 2, x1 ## 2; \ 47362306a36Sopenharmony_ci vpsrld $22, x2 ## 2, x4 ## 2; \ 47462306a36Sopenharmony_ci vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ 47562306a36Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; \ 47662306a36Sopenharmony_ci vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ 47762306a36Sopenharmony_ci vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ 47862306a36Sopenharmony_ci vpslld $7, x1 ## 1, x4 ## 1; \ 47962306a36Sopenharmony_ci vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ 48062306a36Sopenharmony_ci vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ 48162306a36Sopenharmony_ci vpsrld $1, x1 ## 1, x4 ## 1; \ 48262306a36Sopenharmony_ci vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ 48362306a36Sopenharmony_ci vpor x4 ## 1, x1 ## 1, x1 ## 1; \ 48462306a36Sopenharmony_ci vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ 48562306a36Sopenharmony_ci vpslld $7, x1 ## 2, x4 ## 2; \ 48662306a36Sopenharmony_ci vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ 48762306a36Sopenharmony_ci vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ 48862306a36Sopenharmony_ci vpsrld $1, x1 ## 2, x4 ## 2; \ 48962306a36Sopenharmony_ci vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ 49062306a36Sopenharmony_ci vpor x4 ## 2, x1 ## 2, x1 ## 2; \ 49162306a36Sopenharmony_ci vpsrld $7, x3 ## 1, x4 ## 1; \ 49262306a36Sopenharmony_ci vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ 49362306a36Sopenharmony_ci vpor x4 ## 1, x3 ## 1, x3 ## 1; \ 49462306a36Sopenharmony_ci vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ 49562306a36Sopenharmony_ci vpslld $3, x0 ## 1, x4 ## 1; \ 49662306a36Sopenharmony_ci vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ 49762306a36Sopenharmony_ci vpsrld $7, x3 ## 2, x4 ## 2; \ 49862306a36Sopenharmony_ci vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ 49962306a36Sopenharmony_ci vpor x4 ## 2, x3 ## 2, x3 ## 2; \ 50062306a36Sopenharmony_ci vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ 50162306a36Sopenharmony_ci vpslld $3, x0 ## 2, x4 ## 2; \ 50262306a36Sopenharmony_ci vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ 50362306a36Sopenharmony_ci vpsrld $13, x0 ## 1, x4 ## 1; \ 50462306a36Sopenharmony_ci vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ 50562306a36Sopenharmony_ci vpor x4 ## 1, x0 ## 1, x0 ## 1; \ 50662306a36Sopenharmony_ci vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ 50762306a36Sopenharmony_ci vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ 50862306a36Sopenharmony_ci vpsrld $3, x2 ## 1, x4 ## 1; \ 50962306a36Sopenharmony_ci vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ 51062306a36Sopenharmony_ci vpor x4 ## 1, x2 ## 1, x2 ## 1; \ 51162306a36Sopenharmony_ci vpsrld $13, x0 ## 2, x4 ## 2; \ 51262306a36Sopenharmony_ci vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ 51362306a36Sopenharmony_ci vpor x4 ## 2, x0 ## 2, x0 ## 2; \ 51462306a36Sopenharmony_ci vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ 51562306a36Sopenharmony_ci vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ 51662306a36Sopenharmony_ci vpsrld $3, x2 ## 2, x4 ## 2; \ 51762306a36Sopenharmony_ci vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ 51862306a36Sopenharmony_ci vpor x4 ## 2, x2 ## 2, x2 ## 2; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci#define S(SBOX, x0, x1, x2, x3, x4) \ 52162306a36Sopenharmony_ci SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 52262306a36Sopenharmony_ci SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 52362306a36Sopenharmony_ci SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 52462306a36Sopenharmony_ci SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci#define SP(SBOX, x0, x1, x2, x3, x4, i) \ 52762306a36Sopenharmony_ci get_key(i, 0, RK0); \ 52862306a36Sopenharmony_ci SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 52962306a36Sopenharmony_ci get_key(i, 2, RK2); \ 53062306a36Sopenharmony_ci SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 53162306a36Sopenharmony_ci get_key(i, 3, RK3); \ 53262306a36Sopenharmony_ci SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 53362306a36Sopenharmony_ci get_key(i, 1, RK1); \ 53462306a36Sopenharmony_ci SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 53762306a36Sopenharmony_ci vpunpckldq x1, x0, t0; \ 53862306a36Sopenharmony_ci vpunpckhdq x1, x0, t2; \ 53962306a36Sopenharmony_ci vpunpckldq x3, x2, t1; \ 54062306a36Sopenharmony_ci vpunpckhdq x3, x2, x3; \ 54162306a36Sopenharmony_ci \ 54262306a36Sopenharmony_ci vpunpcklqdq t1, t0, x0; \ 54362306a36Sopenharmony_ci vpunpckhqdq t1, t0, x1; \ 54462306a36Sopenharmony_ci vpunpcklqdq x3, t2, x2; \ 54562306a36Sopenharmony_ci vpunpckhqdq x3, t2, x3; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ 54862306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ 55162306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__serpent_enc_blk16) 55462306a36Sopenharmony_ci /* input: 55562306a36Sopenharmony_ci * %rdi: ctx, CTX 55662306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext 55762306a36Sopenharmony_ci * output: 55862306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci vpcmpeqd RNOT, RNOT, RNOT; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 56462306a36Sopenharmony_ci read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci K2(RA, RB, RC, RD, RE, 0); 56762306a36Sopenharmony_ci S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); 56862306a36Sopenharmony_ci S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); 56962306a36Sopenharmony_ci S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); 57062306a36Sopenharmony_ci S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); 57162306a36Sopenharmony_ci S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); 57262306a36Sopenharmony_ci S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); 57362306a36Sopenharmony_ci S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); 57462306a36Sopenharmony_ci S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); 57562306a36Sopenharmony_ci S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); 57662306a36Sopenharmony_ci S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); 57762306a36Sopenharmony_ci S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); 57862306a36Sopenharmony_ci S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); 57962306a36Sopenharmony_ci S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); 58062306a36Sopenharmony_ci S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); 58162306a36Sopenharmony_ci S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); 58262306a36Sopenharmony_ci S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); 58362306a36Sopenharmony_ci S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); 58462306a36Sopenharmony_ci S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); 58562306a36Sopenharmony_ci S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); 58662306a36Sopenharmony_ci S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); 58762306a36Sopenharmony_ci S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); 58862306a36Sopenharmony_ci S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); 58962306a36Sopenharmony_ci S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); 59062306a36Sopenharmony_ci S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); 59162306a36Sopenharmony_ci S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); 59262306a36Sopenharmony_ci S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); 59362306a36Sopenharmony_ci S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); 59462306a36Sopenharmony_ci S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); 59562306a36Sopenharmony_ci S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); 59662306a36Sopenharmony_ci S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); 59762306a36Sopenharmony_ci S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); 59862306a36Sopenharmony_ci S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 60162306a36Sopenharmony_ci write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci RET; 60462306a36Sopenharmony_ciSYM_FUNC_END(__serpent_enc_blk16) 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__serpent_dec_blk16) 60762306a36Sopenharmony_ci /* input: 60862306a36Sopenharmony_ci * %rdi: ctx, CTX 60962306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext 61062306a36Sopenharmony_ci * output: 61162306a36Sopenharmony_ci * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci vpcmpeqd RNOT, RNOT, RNOT; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 61762306a36Sopenharmony_ci read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci K2(RA, RB, RC, RD, RE, 32); 62062306a36Sopenharmony_ci SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); 62162306a36Sopenharmony_ci SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); 62262306a36Sopenharmony_ci SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); 62362306a36Sopenharmony_ci SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); 62462306a36Sopenharmony_ci SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); 62562306a36Sopenharmony_ci SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); 62662306a36Sopenharmony_ci SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); 62762306a36Sopenharmony_ci SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); 62862306a36Sopenharmony_ci SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); 62962306a36Sopenharmony_ci SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); 63062306a36Sopenharmony_ci SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); 63162306a36Sopenharmony_ci SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); 63262306a36Sopenharmony_ci SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); 63362306a36Sopenharmony_ci SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); 63462306a36Sopenharmony_ci SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); 63562306a36Sopenharmony_ci SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); 63662306a36Sopenharmony_ci SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); 63762306a36Sopenharmony_ci SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); 63862306a36Sopenharmony_ci SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); 63962306a36Sopenharmony_ci SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); 64062306a36Sopenharmony_ci SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); 64162306a36Sopenharmony_ci SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); 64262306a36Sopenharmony_ci SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); 64362306a36Sopenharmony_ci SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); 64462306a36Sopenharmony_ci SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); 64562306a36Sopenharmony_ci SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); 64662306a36Sopenharmony_ci SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); 64762306a36Sopenharmony_ci SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); 64862306a36Sopenharmony_ci SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); 64962306a36Sopenharmony_ci SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); 65062306a36Sopenharmony_ci SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); 65162306a36Sopenharmony_ci S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); 65462306a36Sopenharmony_ci write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci RET; 65762306a36Sopenharmony_ciSYM_FUNC_END(__serpent_dec_blk16) 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ciSYM_FUNC_START(serpent_ecb_enc_16way) 66062306a36Sopenharmony_ci /* input: 66162306a36Sopenharmony_ci * %rdi: ctx, CTX 66262306a36Sopenharmony_ci * %rsi: dst 66362306a36Sopenharmony_ci * %rdx: src 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_ci FRAME_BEGIN 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci vzeroupper; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci call __serpent_enc_blk16; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci vzeroupper; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci FRAME_END 67862306a36Sopenharmony_ci RET; 67962306a36Sopenharmony_ciSYM_FUNC_END(serpent_ecb_enc_16way) 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ciSYM_FUNC_START(serpent_ecb_dec_16way) 68262306a36Sopenharmony_ci /* input: 68362306a36Sopenharmony_ci * %rdi: ctx, CTX 68462306a36Sopenharmony_ci * %rsi: dst 68562306a36Sopenharmony_ci * %rdx: src 68662306a36Sopenharmony_ci */ 68762306a36Sopenharmony_ci FRAME_BEGIN 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci vzeroupper; 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci call __serpent_dec_blk16; 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci vzeroupper; 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci FRAME_END 70062306a36Sopenharmony_ci RET; 70162306a36Sopenharmony_ciSYM_FUNC_END(serpent_ecb_dec_16way) 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ciSYM_FUNC_START(serpent_cbc_dec_16way) 70462306a36Sopenharmony_ci /* input: 70562306a36Sopenharmony_ci * %rdi: ctx, CTX 70662306a36Sopenharmony_ci * %rsi: dst 70762306a36Sopenharmony_ci * %rdx: src 70862306a36Sopenharmony_ci */ 70962306a36Sopenharmony_ci FRAME_BEGIN 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci vzeroupper; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci call __serpent_dec_blk16; 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2, 71862306a36Sopenharmony_ci RK0); 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci vzeroupper; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci FRAME_END 72362306a36Sopenharmony_ci RET; 72462306a36Sopenharmony_ciSYM_FUNC_END(serpent_cbc_dec_16way) 725