162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Based on crypto/serpent.c by 862306a36Sopenharmony_ci * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> 962306a36Sopenharmony_ci * 2003 Herbert Valerio Riedel <hvr@gnu.org> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/linkage.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci.file "serpent-sse2-x86_64-asm_64.S" 1562306a36Sopenharmony_ci.text 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define CTX %rdi 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/********************************************************************** 2062306a36Sopenharmony_ci 8-way SSE2 serpent 2162306a36Sopenharmony_ci **********************************************************************/ 2262306a36Sopenharmony_ci#define RA1 %xmm0 2362306a36Sopenharmony_ci#define RB1 %xmm1 2462306a36Sopenharmony_ci#define RC1 %xmm2 2562306a36Sopenharmony_ci#define RD1 %xmm3 2662306a36Sopenharmony_ci#define RE1 %xmm4 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#define RA2 %xmm5 2962306a36Sopenharmony_ci#define RB2 %xmm6 3062306a36Sopenharmony_ci#define RC2 %xmm7 3162306a36Sopenharmony_ci#define RD2 %xmm8 3262306a36Sopenharmony_ci#define RE2 %xmm9 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#define RNOT %xmm10 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#define RK0 %xmm11 3762306a36Sopenharmony_ci#define RK1 %xmm12 3862306a36Sopenharmony_ci#define RK2 %xmm13 3962306a36Sopenharmony_ci#define RK3 %xmm14 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define S0_1(x0, x1, x2, x3, x4) \ 4262306a36Sopenharmony_ci movdqa x3, x4; \ 4362306a36Sopenharmony_ci por x0, x3; \ 4462306a36Sopenharmony_ci pxor x4, x0; \ 4562306a36Sopenharmony_ci pxor x2, x4; \ 4662306a36Sopenharmony_ci pxor RNOT, x4; \ 4762306a36Sopenharmony_ci pxor x1, x3; \ 4862306a36Sopenharmony_ci pand x0, x1; \ 4962306a36Sopenharmony_ci pxor x4, x1; \ 5062306a36Sopenharmony_ci pxor x0, x2; 5162306a36Sopenharmony_ci#define S0_2(x0, x1, x2, x3, x4) \ 5262306a36Sopenharmony_ci pxor x3, x0; \ 5362306a36Sopenharmony_ci por x0, x4; \ 5462306a36Sopenharmony_ci pxor x2, x0; \ 5562306a36Sopenharmony_ci pand x1, x2; \ 5662306a36Sopenharmony_ci pxor x2, x3; \ 5762306a36Sopenharmony_ci pxor RNOT, x1; \ 5862306a36Sopenharmony_ci pxor x4, x2; \ 5962306a36Sopenharmony_ci pxor x2, x1; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#define S1_1(x0, x1, x2, x3, x4) \ 6262306a36Sopenharmony_ci movdqa x1, x4; \ 6362306a36Sopenharmony_ci pxor x0, x1; \ 6462306a36Sopenharmony_ci pxor x3, x0; \ 6562306a36Sopenharmony_ci pxor RNOT, x3; \ 6662306a36Sopenharmony_ci pand x1, x4; \ 6762306a36Sopenharmony_ci por x1, x0; \ 6862306a36Sopenharmony_ci pxor x2, x3; \ 6962306a36Sopenharmony_ci pxor x3, x0; \ 7062306a36Sopenharmony_ci pxor x3, x1; 7162306a36Sopenharmony_ci#define S1_2(x0, x1, x2, x3, x4) \ 7262306a36Sopenharmony_ci pxor x4, x3; \ 7362306a36Sopenharmony_ci por x4, x1; \ 7462306a36Sopenharmony_ci pxor x2, x4; \ 7562306a36Sopenharmony_ci pand x0, x2; \ 7662306a36Sopenharmony_ci pxor x1, x2; \ 7762306a36Sopenharmony_ci por x0, x1; \ 7862306a36Sopenharmony_ci pxor RNOT, x0; \ 7962306a36Sopenharmony_ci pxor x2, x0; \ 8062306a36Sopenharmony_ci pxor x1, x4; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci#define S2_1(x0, x1, x2, x3, x4) \ 8362306a36Sopenharmony_ci pxor RNOT, x3; \ 8462306a36Sopenharmony_ci pxor x0, x1; \ 8562306a36Sopenharmony_ci movdqa x0, x4; \ 8662306a36Sopenharmony_ci pand x2, x0; \ 8762306a36Sopenharmony_ci pxor x3, x0; \ 8862306a36Sopenharmony_ci por x4, x3; \ 8962306a36Sopenharmony_ci pxor x1, x2; \ 9062306a36Sopenharmony_ci pxor x1, x3; \ 9162306a36Sopenharmony_ci pand x0, x1; 9262306a36Sopenharmony_ci#define S2_2(x0, x1, x2, x3, x4) \ 9362306a36Sopenharmony_ci pxor x2, x0; \ 9462306a36Sopenharmony_ci pand x3, x2; \ 9562306a36Sopenharmony_ci por x1, x3; \ 9662306a36Sopenharmony_ci pxor RNOT, x0; \ 9762306a36Sopenharmony_ci pxor x0, x3; \ 9862306a36Sopenharmony_ci pxor x0, x4; \ 9962306a36Sopenharmony_ci pxor x2, x0; \ 10062306a36Sopenharmony_ci por x2, x1; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci#define S3_1(x0, x1, x2, x3, x4) \ 10362306a36Sopenharmony_ci movdqa x1, x4; \ 10462306a36Sopenharmony_ci pxor x3, x1; \ 10562306a36Sopenharmony_ci por x0, x3; \ 10662306a36Sopenharmony_ci pand x0, x4; \ 10762306a36Sopenharmony_ci pxor x2, x0; \ 10862306a36Sopenharmony_ci pxor x1, x2; \ 10962306a36Sopenharmony_ci pand x3, x1; \ 11062306a36Sopenharmony_ci pxor x3, x2; \ 11162306a36Sopenharmony_ci por x4, x0; \ 11262306a36Sopenharmony_ci pxor x3, x4; 11362306a36Sopenharmony_ci#define S3_2(x0, x1, x2, x3, x4) \ 11462306a36Sopenharmony_ci pxor x0, x1; \ 11562306a36Sopenharmony_ci pand x3, x0; \ 11662306a36Sopenharmony_ci pand x4, x3; \ 11762306a36Sopenharmony_ci pxor x2, x3; \ 11862306a36Sopenharmony_ci por x1, x4; \ 11962306a36Sopenharmony_ci pand x1, x2; \ 12062306a36Sopenharmony_ci pxor x3, x4; \ 12162306a36Sopenharmony_ci pxor x3, x0; \ 12262306a36Sopenharmony_ci pxor x2, x3; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci#define S4_1(x0, x1, x2, x3, x4) \ 12562306a36Sopenharmony_ci movdqa x3, x4; \ 12662306a36Sopenharmony_ci pand x0, x3; \ 12762306a36Sopenharmony_ci pxor x4, x0; \ 12862306a36Sopenharmony_ci pxor x2, x3; \ 12962306a36Sopenharmony_ci por x4, x2; \ 13062306a36Sopenharmony_ci pxor x1, x0; \ 13162306a36Sopenharmony_ci pxor x3, x4; \ 13262306a36Sopenharmony_ci por x0, x2; \ 13362306a36Sopenharmony_ci pxor x1, x2; 13462306a36Sopenharmony_ci#define S4_2(x0, x1, x2, x3, x4) \ 13562306a36Sopenharmony_ci pand x0, x1; \ 13662306a36Sopenharmony_ci pxor x4, x1; \ 13762306a36Sopenharmony_ci pand x2, x4; \ 13862306a36Sopenharmony_ci pxor x3, x2; \ 13962306a36Sopenharmony_ci pxor x0, x4; \ 14062306a36Sopenharmony_ci por x1, x3; \ 14162306a36Sopenharmony_ci pxor RNOT, x1; \ 14262306a36Sopenharmony_ci pxor x0, x3; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci#define S5_1(x0, x1, x2, x3, x4) \ 14562306a36Sopenharmony_ci movdqa x1, x4; \ 14662306a36Sopenharmony_ci por x0, x1; \ 14762306a36Sopenharmony_ci pxor x1, x2; \ 14862306a36Sopenharmony_ci pxor RNOT, x3; \ 14962306a36Sopenharmony_ci pxor x0, x4; \ 15062306a36Sopenharmony_ci pxor x2, x0; \ 15162306a36Sopenharmony_ci pand x4, x1; \ 15262306a36Sopenharmony_ci por x3, x4; \ 15362306a36Sopenharmony_ci pxor x0, x4; 15462306a36Sopenharmony_ci#define S5_2(x0, x1, x2, x3, x4) \ 15562306a36Sopenharmony_ci pand x3, x0; \ 15662306a36Sopenharmony_ci pxor x3, x1; \ 15762306a36Sopenharmony_ci pxor x2, x3; \ 15862306a36Sopenharmony_ci pxor x1, x0; \ 15962306a36Sopenharmony_ci pand x4, x2; \ 16062306a36Sopenharmony_ci pxor x2, x1; \ 16162306a36Sopenharmony_ci pand x0, x2; \ 16262306a36Sopenharmony_ci pxor x2, x3; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci#define S6_1(x0, x1, x2, x3, x4) \ 16562306a36Sopenharmony_ci movdqa x1, x4; \ 16662306a36Sopenharmony_ci pxor x0, x3; \ 16762306a36Sopenharmony_ci pxor x2, x1; \ 16862306a36Sopenharmony_ci pxor x0, x2; \ 16962306a36Sopenharmony_ci pand x3, x0; \ 17062306a36Sopenharmony_ci por x3, x1; \ 17162306a36Sopenharmony_ci pxor RNOT, x4; \ 17262306a36Sopenharmony_ci pxor x1, x0; \ 17362306a36Sopenharmony_ci pxor x2, x1; 17462306a36Sopenharmony_ci#define S6_2(x0, x1, x2, x3, x4) \ 17562306a36Sopenharmony_ci pxor x4, x3; \ 17662306a36Sopenharmony_ci pxor x0, x4; \ 17762306a36Sopenharmony_ci pand x0, x2; \ 17862306a36Sopenharmony_ci pxor x1, x4; \ 17962306a36Sopenharmony_ci pxor x3, x2; \ 18062306a36Sopenharmony_ci pand x1, x3; \ 18162306a36Sopenharmony_ci pxor x0, x3; \ 18262306a36Sopenharmony_ci pxor x2, x1; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci#define S7_1(x0, x1, x2, x3, x4) \ 18562306a36Sopenharmony_ci pxor RNOT, x1; \ 18662306a36Sopenharmony_ci movdqa x1, x4; \ 18762306a36Sopenharmony_ci pxor RNOT, x0; \ 18862306a36Sopenharmony_ci pand x2, x1; \ 18962306a36Sopenharmony_ci pxor x3, x1; \ 19062306a36Sopenharmony_ci por x4, x3; \ 19162306a36Sopenharmony_ci pxor x2, x4; \ 19262306a36Sopenharmony_ci pxor x3, x2; \ 19362306a36Sopenharmony_ci pxor x0, x3; \ 19462306a36Sopenharmony_ci por x1, x0; 19562306a36Sopenharmony_ci#define S7_2(x0, x1, x2, x3, x4) \ 19662306a36Sopenharmony_ci pand x0, x2; \ 19762306a36Sopenharmony_ci pxor x4, x0; \ 19862306a36Sopenharmony_ci pxor x3, x4; \ 19962306a36Sopenharmony_ci pand x0, x3; \ 20062306a36Sopenharmony_ci pxor x1, x4; \ 20162306a36Sopenharmony_ci pxor x4, x2; \ 20262306a36Sopenharmony_ci pxor x1, x3; \ 20362306a36Sopenharmony_ci por x0, x4; \ 20462306a36Sopenharmony_ci pxor x1, x4; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci#define SI0_1(x0, x1, x2, x3, x4) \ 20762306a36Sopenharmony_ci movdqa x3, x4; \ 20862306a36Sopenharmony_ci pxor x0, x1; \ 20962306a36Sopenharmony_ci por x1, x3; \ 21062306a36Sopenharmony_ci pxor x1, x4; \ 21162306a36Sopenharmony_ci pxor RNOT, x0; \ 21262306a36Sopenharmony_ci pxor x3, x2; \ 21362306a36Sopenharmony_ci pxor x0, x3; \ 21462306a36Sopenharmony_ci pand x1, x0; \ 21562306a36Sopenharmony_ci pxor x2, x0; 21662306a36Sopenharmony_ci#define SI0_2(x0, x1, x2, x3, x4) \ 21762306a36Sopenharmony_ci pand x3, x2; \ 21862306a36Sopenharmony_ci pxor x4, x3; \ 21962306a36Sopenharmony_ci pxor x3, x2; \ 22062306a36Sopenharmony_ci pxor x3, x1; \ 22162306a36Sopenharmony_ci pand x0, x3; \ 22262306a36Sopenharmony_ci pxor x0, x1; \ 22362306a36Sopenharmony_ci pxor x2, x0; \ 22462306a36Sopenharmony_ci pxor x3, x4; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci#define SI1_1(x0, x1, x2, x3, x4) \ 22762306a36Sopenharmony_ci pxor x3, x1; \ 22862306a36Sopenharmony_ci movdqa x0, x4; \ 22962306a36Sopenharmony_ci pxor x2, x0; \ 23062306a36Sopenharmony_ci pxor RNOT, x2; \ 23162306a36Sopenharmony_ci por x1, x4; \ 23262306a36Sopenharmony_ci pxor x3, x4; \ 23362306a36Sopenharmony_ci pand x1, x3; \ 23462306a36Sopenharmony_ci pxor x2, x1; \ 23562306a36Sopenharmony_ci pand x4, x2; 23662306a36Sopenharmony_ci#define SI1_2(x0, x1, x2, x3, x4) \ 23762306a36Sopenharmony_ci pxor x1, x4; \ 23862306a36Sopenharmony_ci por x3, x1; \ 23962306a36Sopenharmony_ci pxor x0, x3; \ 24062306a36Sopenharmony_ci pxor x0, x2; \ 24162306a36Sopenharmony_ci por x4, x0; \ 24262306a36Sopenharmony_ci pxor x4, x2; \ 24362306a36Sopenharmony_ci pxor x0, x1; \ 24462306a36Sopenharmony_ci pxor x1, x4; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci#define SI2_1(x0, x1, x2, x3, x4) \ 24762306a36Sopenharmony_ci pxor x1, x2; \ 24862306a36Sopenharmony_ci movdqa x3, x4; \ 24962306a36Sopenharmony_ci pxor RNOT, x3; \ 25062306a36Sopenharmony_ci por x2, x3; \ 25162306a36Sopenharmony_ci pxor x4, x2; \ 25262306a36Sopenharmony_ci pxor x0, x4; \ 25362306a36Sopenharmony_ci pxor x1, x3; \ 25462306a36Sopenharmony_ci por x2, x1; \ 25562306a36Sopenharmony_ci pxor x0, x2; 25662306a36Sopenharmony_ci#define SI2_2(x0, x1, x2, x3, x4) \ 25762306a36Sopenharmony_ci pxor x4, x1; \ 25862306a36Sopenharmony_ci por x3, x4; \ 25962306a36Sopenharmony_ci pxor x3, x2; \ 26062306a36Sopenharmony_ci pxor x2, x4; \ 26162306a36Sopenharmony_ci pand x1, x2; \ 26262306a36Sopenharmony_ci pxor x3, x2; \ 26362306a36Sopenharmony_ci pxor x4, x3; \ 26462306a36Sopenharmony_ci pxor x0, x4; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci#define SI3_1(x0, x1, x2, x3, x4) \ 26762306a36Sopenharmony_ci pxor x1, x2; \ 26862306a36Sopenharmony_ci movdqa x1, x4; \ 26962306a36Sopenharmony_ci pand x2, x1; \ 27062306a36Sopenharmony_ci pxor x0, x1; \ 27162306a36Sopenharmony_ci por x4, x0; \ 27262306a36Sopenharmony_ci pxor x3, x4; \ 27362306a36Sopenharmony_ci pxor x3, x0; \ 27462306a36Sopenharmony_ci por x1, x3; \ 27562306a36Sopenharmony_ci pxor x2, x1; 27662306a36Sopenharmony_ci#define SI3_2(x0, x1, x2, x3, x4) \ 27762306a36Sopenharmony_ci pxor x3, x1; \ 27862306a36Sopenharmony_ci pxor x2, x0; \ 27962306a36Sopenharmony_ci pxor x3, x2; \ 28062306a36Sopenharmony_ci pand x1, x3; \ 28162306a36Sopenharmony_ci pxor x0, x1; \ 28262306a36Sopenharmony_ci pand x2, x0; \ 28362306a36Sopenharmony_ci pxor x3, x4; \ 28462306a36Sopenharmony_ci pxor x0, x3; \ 28562306a36Sopenharmony_ci pxor x1, x0; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci#define SI4_1(x0, x1, x2, x3, x4) \ 28862306a36Sopenharmony_ci pxor x3, x2; \ 28962306a36Sopenharmony_ci movdqa x0, x4; \ 29062306a36Sopenharmony_ci pand x1, x0; \ 29162306a36Sopenharmony_ci pxor x2, x0; \ 29262306a36Sopenharmony_ci por x3, x2; \ 29362306a36Sopenharmony_ci pxor RNOT, x4; \ 29462306a36Sopenharmony_ci pxor x0, x1; \ 29562306a36Sopenharmony_ci pxor x2, x0; \ 29662306a36Sopenharmony_ci pand x4, x2; 29762306a36Sopenharmony_ci#define SI4_2(x0, x1, x2, x3, x4) \ 29862306a36Sopenharmony_ci pxor x0, x2; \ 29962306a36Sopenharmony_ci por x4, x0; \ 30062306a36Sopenharmony_ci pxor x3, x0; \ 30162306a36Sopenharmony_ci pand x2, x3; \ 30262306a36Sopenharmony_ci pxor x3, x4; \ 30362306a36Sopenharmony_ci pxor x1, x3; \ 30462306a36Sopenharmony_ci pand x0, x1; \ 30562306a36Sopenharmony_ci pxor x1, x4; \ 30662306a36Sopenharmony_ci pxor x3, x0; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci#define SI5_1(x0, x1, x2, x3, x4) \ 30962306a36Sopenharmony_ci movdqa x1, x4; \ 31062306a36Sopenharmony_ci por x2, x1; \ 31162306a36Sopenharmony_ci pxor x4, x2; \ 31262306a36Sopenharmony_ci pxor x3, x1; \ 31362306a36Sopenharmony_ci pand x4, x3; \ 31462306a36Sopenharmony_ci pxor x3, x2; \ 31562306a36Sopenharmony_ci por x0, x3; \ 31662306a36Sopenharmony_ci pxor RNOT, x0; \ 31762306a36Sopenharmony_ci pxor x2, x3; \ 31862306a36Sopenharmony_ci por x0, x2; 31962306a36Sopenharmony_ci#define SI5_2(x0, x1, x2, x3, x4) \ 32062306a36Sopenharmony_ci pxor x1, x4; \ 32162306a36Sopenharmony_ci pxor x4, x2; \ 32262306a36Sopenharmony_ci pand x0, x4; \ 32362306a36Sopenharmony_ci pxor x1, x0; \ 32462306a36Sopenharmony_ci pxor x3, x1; \ 32562306a36Sopenharmony_ci pand x2, x0; \ 32662306a36Sopenharmony_ci pxor x3, x2; \ 32762306a36Sopenharmony_ci pxor x2, x0; \ 32862306a36Sopenharmony_ci pxor x4, x2; \ 32962306a36Sopenharmony_ci pxor x3, x4; 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci#define SI6_1(x0, x1, x2, x3, x4) \ 33262306a36Sopenharmony_ci pxor x2, x0; \ 33362306a36Sopenharmony_ci movdqa x0, x4; \ 33462306a36Sopenharmony_ci pand x3, x0; \ 33562306a36Sopenharmony_ci pxor x3, x2; \ 33662306a36Sopenharmony_ci pxor x2, x0; \ 33762306a36Sopenharmony_ci pxor x1, x3; \ 33862306a36Sopenharmony_ci por x4, x2; \ 33962306a36Sopenharmony_ci pxor x3, x2; \ 34062306a36Sopenharmony_ci pand x0, x3; 34162306a36Sopenharmony_ci#define SI6_2(x0, x1, x2, x3, x4) \ 34262306a36Sopenharmony_ci pxor RNOT, x0; \ 34362306a36Sopenharmony_ci pxor x1, x3; \ 34462306a36Sopenharmony_ci pand x2, x1; \ 34562306a36Sopenharmony_ci pxor x0, x4; \ 34662306a36Sopenharmony_ci pxor x4, x3; \ 34762306a36Sopenharmony_ci pxor x2, x4; \ 34862306a36Sopenharmony_ci pxor x1, x0; \ 34962306a36Sopenharmony_ci pxor x0, x2; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci#define SI7_1(x0, x1, x2, x3, x4) \ 35262306a36Sopenharmony_ci movdqa x3, x4; \ 35362306a36Sopenharmony_ci pand x0, x3; \ 35462306a36Sopenharmony_ci pxor x2, x0; \ 35562306a36Sopenharmony_ci por x4, x2; \ 35662306a36Sopenharmony_ci pxor x1, x4; \ 35762306a36Sopenharmony_ci pxor RNOT, x0; \ 35862306a36Sopenharmony_ci por x3, x1; \ 35962306a36Sopenharmony_ci pxor x0, x4; \ 36062306a36Sopenharmony_ci pand x2, x0; \ 36162306a36Sopenharmony_ci pxor x1, x0; 36262306a36Sopenharmony_ci#define SI7_2(x0, x1, x2, x3, x4) \ 36362306a36Sopenharmony_ci pand x2, x1; \ 36462306a36Sopenharmony_ci pxor x2, x3; \ 36562306a36Sopenharmony_ci pxor x3, x4; \ 36662306a36Sopenharmony_ci pand x3, x2; \ 36762306a36Sopenharmony_ci por x0, x3; \ 36862306a36Sopenharmony_ci pxor x4, x1; \ 36962306a36Sopenharmony_ci pxor x4, x3; \ 37062306a36Sopenharmony_ci pand x0, x4; \ 37162306a36Sopenharmony_ci pxor x2, x4; 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci#define get_key(i, j, t) \ 37462306a36Sopenharmony_ci movd (4*(i)+(j))*4(CTX), t; \ 37562306a36Sopenharmony_ci pshufd $0, t, t; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci#define K2(x0, x1, x2, x3, x4, i) \ 37862306a36Sopenharmony_ci get_key(i, 0, RK0); \ 37962306a36Sopenharmony_ci get_key(i, 1, RK1); \ 38062306a36Sopenharmony_ci get_key(i, 2, RK2); \ 38162306a36Sopenharmony_ci get_key(i, 3, RK3); \ 38262306a36Sopenharmony_ci pxor RK0, x0 ## 1; \ 38362306a36Sopenharmony_ci pxor RK1, x1 ## 1; \ 38462306a36Sopenharmony_ci pxor RK2, x2 ## 1; \ 38562306a36Sopenharmony_ci pxor RK3, x3 ## 1; \ 38662306a36Sopenharmony_ci pxor RK0, x0 ## 2; \ 38762306a36Sopenharmony_ci pxor RK1, x1 ## 2; \ 38862306a36Sopenharmony_ci pxor RK2, x2 ## 2; \ 38962306a36Sopenharmony_ci pxor RK3, x3 ## 2; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci#define LK2(x0, x1, x2, x3, x4, i) \ 39262306a36Sopenharmony_ci movdqa x0 ## 1, x4 ## 1; \ 39362306a36Sopenharmony_ci pslld $13, x0 ## 1; \ 39462306a36Sopenharmony_ci psrld $(32 - 13), x4 ## 1; \ 39562306a36Sopenharmony_ci por x4 ## 1, x0 ## 1; \ 39662306a36Sopenharmony_ci pxor x0 ## 1, x1 ## 1; \ 39762306a36Sopenharmony_ci movdqa x2 ## 1, x4 ## 1; \ 39862306a36Sopenharmony_ci pslld $3, x2 ## 1; \ 39962306a36Sopenharmony_ci psrld $(32 - 3), x4 ## 1; \ 40062306a36Sopenharmony_ci por x4 ## 1, x2 ## 1; \ 40162306a36Sopenharmony_ci pxor x2 ## 1, x1 ## 1; \ 40262306a36Sopenharmony_ci movdqa x0 ## 2, x4 ## 2; \ 40362306a36Sopenharmony_ci pslld $13, x0 ## 2; \ 40462306a36Sopenharmony_ci psrld $(32 - 13), x4 ## 2; \ 40562306a36Sopenharmony_ci por x4 ## 2, x0 ## 2; \ 40662306a36Sopenharmony_ci pxor x0 ## 2, x1 ## 2; \ 40762306a36Sopenharmony_ci movdqa x2 ## 2, x4 ## 2; \ 40862306a36Sopenharmony_ci pslld $3, x2 ## 2; \ 40962306a36Sopenharmony_ci psrld $(32 - 3), x4 ## 2; \ 41062306a36Sopenharmony_ci por x4 ## 2, x2 ## 2; \ 41162306a36Sopenharmony_ci pxor x2 ## 2, x1 ## 2; \ 41262306a36Sopenharmony_ci movdqa x1 ## 1, x4 ## 1; \ 41362306a36Sopenharmony_ci pslld $1, x1 ## 1; \ 41462306a36Sopenharmony_ci psrld $(32 - 1), x4 ## 1; \ 41562306a36Sopenharmony_ci por x4 ## 1, x1 ## 1; \ 41662306a36Sopenharmony_ci movdqa x0 ## 1, x4 ## 1; \ 41762306a36Sopenharmony_ci pslld $3, x4 ## 1; \ 41862306a36Sopenharmony_ci pxor x2 ## 1, x3 ## 1; \ 41962306a36Sopenharmony_ci pxor x4 ## 1, x3 ## 1; \ 42062306a36Sopenharmony_ci movdqa x3 ## 1, x4 ## 1; \ 42162306a36Sopenharmony_ci get_key(i, 1, RK1); \ 42262306a36Sopenharmony_ci movdqa x1 ## 2, x4 ## 2; \ 42362306a36Sopenharmony_ci pslld $1, x1 ## 2; \ 42462306a36Sopenharmony_ci psrld $(32 - 1), x4 ## 2; \ 42562306a36Sopenharmony_ci por x4 ## 2, x1 ## 2; \ 42662306a36Sopenharmony_ci movdqa x0 ## 2, x4 ## 2; \ 42762306a36Sopenharmony_ci pslld $3, x4 ## 2; \ 42862306a36Sopenharmony_ci pxor x2 ## 2, x3 ## 2; \ 42962306a36Sopenharmony_ci pxor x4 ## 2, x3 ## 2; \ 43062306a36Sopenharmony_ci movdqa x3 ## 2, x4 ## 2; \ 43162306a36Sopenharmony_ci get_key(i, 3, RK3); \ 43262306a36Sopenharmony_ci pslld $7, x3 ## 1; \ 43362306a36Sopenharmony_ci psrld $(32 - 7), x4 ## 1; \ 43462306a36Sopenharmony_ci por x4 ## 1, x3 ## 1; \ 43562306a36Sopenharmony_ci movdqa x1 ## 1, x4 ## 1; \ 43662306a36Sopenharmony_ci pslld $7, x4 ## 1; \ 43762306a36Sopenharmony_ci pxor x1 ## 1, x0 ## 1; \ 43862306a36Sopenharmony_ci pxor x3 ## 1, x0 ## 1; \ 43962306a36Sopenharmony_ci pxor x3 ## 1, x2 ## 1; \ 44062306a36Sopenharmony_ci pxor x4 ## 1, x2 ## 1; \ 44162306a36Sopenharmony_ci get_key(i, 0, RK0); \ 44262306a36Sopenharmony_ci pslld $7, x3 ## 2; \ 44362306a36Sopenharmony_ci psrld $(32 - 7), x4 ## 2; \ 44462306a36Sopenharmony_ci por x4 ## 2, x3 ## 2; \ 44562306a36Sopenharmony_ci movdqa x1 ## 2, x4 ## 2; \ 44662306a36Sopenharmony_ci pslld $7, x4 ## 2; \ 44762306a36Sopenharmony_ci pxor x1 ## 2, x0 ## 2; \ 44862306a36Sopenharmony_ci pxor x3 ## 2, x0 ## 2; \ 44962306a36Sopenharmony_ci pxor x3 ## 2, x2 ## 2; \ 45062306a36Sopenharmony_ci pxor x4 ## 2, x2 ## 2; \ 45162306a36Sopenharmony_ci get_key(i, 2, RK2); \ 45262306a36Sopenharmony_ci pxor RK1, x1 ## 1; \ 45362306a36Sopenharmony_ci pxor RK3, x3 ## 1; \ 45462306a36Sopenharmony_ci movdqa x0 ## 1, x4 ## 1; \ 45562306a36Sopenharmony_ci pslld $5, x0 ## 1; \ 45662306a36Sopenharmony_ci psrld $(32 - 5), x4 ## 1; \ 45762306a36Sopenharmony_ci por x4 ## 1, x0 ## 1; \ 45862306a36Sopenharmony_ci movdqa x2 ## 1, x4 ## 1; \ 45962306a36Sopenharmony_ci pslld $22, x2 ## 1; \ 46062306a36Sopenharmony_ci psrld $(32 - 22), x4 ## 1; \ 46162306a36Sopenharmony_ci por x4 ## 1, x2 ## 1; \ 46262306a36Sopenharmony_ci pxor RK0, x0 ## 1; \ 46362306a36Sopenharmony_ci pxor RK2, x2 ## 1; \ 46462306a36Sopenharmony_ci pxor RK1, x1 ## 2; \ 46562306a36Sopenharmony_ci pxor RK3, x3 ## 2; \ 46662306a36Sopenharmony_ci movdqa x0 ## 2, x4 ## 2; \ 46762306a36Sopenharmony_ci pslld $5, x0 ## 2; \ 46862306a36Sopenharmony_ci psrld $(32 - 5), x4 ## 2; \ 46962306a36Sopenharmony_ci por x4 ## 2, x0 ## 2; \ 47062306a36Sopenharmony_ci movdqa x2 ## 2, x4 ## 2; \ 47162306a36Sopenharmony_ci pslld $22, x2 ## 2; \ 47262306a36Sopenharmony_ci psrld $(32 - 22), x4 ## 2; \ 47362306a36Sopenharmony_ci por x4 ## 2, x2 ## 2; \ 47462306a36Sopenharmony_ci pxor RK0, x0 ## 2; \ 47562306a36Sopenharmony_ci pxor RK2, x2 ## 2; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci#define KL2(x0, x1, x2, x3, x4, i) \ 47862306a36Sopenharmony_ci pxor RK0, x0 ## 1; \ 47962306a36Sopenharmony_ci pxor RK2, x2 ## 1; \ 48062306a36Sopenharmony_ci movdqa x0 ## 1, x4 ## 1; \ 48162306a36Sopenharmony_ci psrld $5, x0 ## 1; \ 48262306a36Sopenharmony_ci pslld $(32 - 5), x4 ## 1; \ 48362306a36Sopenharmony_ci por x4 ## 1, x0 ## 1; \ 48462306a36Sopenharmony_ci pxor RK3, x3 ## 1; \ 48562306a36Sopenharmony_ci pxor RK1, x1 ## 1; \ 48662306a36Sopenharmony_ci movdqa x2 ## 1, x4 ## 1; \ 48762306a36Sopenharmony_ci psrld $22, x2 ## 1; \ 48862306a36Sopenharmony_ci pslld $(32 - 22), x4 ## 1; \ 48962306a36Sopenharmony_ci por x4 ## 1, x2 ## 1; \ 49062306a36Sopenharmony_ci pxor x3 ## 1, x2 ## 1; \ 49162306a36Sopenharmony_ci pxor RK0, x0 ## 2; \ 49262306a36Sopenharmony_ci pxor RK2, x2 ## 2; \ 49362306a36Sopenharmony_ci movdqa x0 ## 2, x4 ## 2; \ 49462306a36Sopenharmony_ci psrld $5, x0 ## 2; \ 49562306a36Sopenharmony_ci pslld $(32 - 5), x4 ## 2; \ 49662306a36Sopenharmony_ci por x4 ## 2, x0 ## 2; \ 49762306a36Sopenharmony_ci pxor RK3, x3 ## 2; \ 49862306a36Sopenharmony_ci pxor RK1, x1 ## 2; \ 49962306a36Sopenharmony_ci movdqa x2 ## 2, x4 ## 2; \ 50062306a36Sopenharmony_ci psrld $22, x2 ## 2; \ 50162306a36Sopenharmony_ci pslld $(32 - 22), x4 ## 2; \ 50262306a36Sopenharmony_ci por x4 ## 2, x2 ## 2; \ 50362306a36Sopenharmony_ci pxor x3 ## 2, x2 ## 2; \ 50462306a36Sopenharmony_ci pxor x3 ## 1, x0 ## 1; \ 50562306a36Sopenharmony_ci movdqa x1 ## 1, x4 ## 1; \ 50662306a36Sopenharmony_ci pslld $7, x4 ## 1; \ 50762306a36Sopenharmony_ci pxor x1 ## 1, x0 ## 1; \ 50862306a36Sopenharmony_ci pxor x4 ## 1, x2 ## 1; \ 50962306a36Sopenharmony_ci movdqa x1 ## 1, x4 ## 1; \ 51062306a36Sopenharmony_ci psrld $1, x1 ## 1; \ 51162306a36Sopenharmony_ci pslld $(32 - 1), x4 ## 1; \ 51262306a36Sopenharmony_ci por x4 ## 1, x1 ## 1; \ 51362306a36Sopenharmony_ci pxor x3 ## 2, x0 ## 2; \ 51462306a36Sopenharmony_ci movdqa x1 ## 2, x4 ## 2; \ 51562306a36Sopenharmony_ci pslld $7, x4 ## 2; \ 51662306a36Sopenharmony_ci pxor x1 ## 2, x0 ## 2; \ 51762306a36Sopenharmony_ci pxor x4 ## 2, x2 ## 2; \ 51862306a36Sopenharmony_ci movdqa x1 ## 2, x4 ## 2; \ 51962306a36Sopenharmony_ci psrld $1, x1 ## 2; \ 52062306a36Sopenharmony_ci pslld $(32 - 1), x4 ## 2; \ 52162306a36Sopenharmony_ci por x4 ## 2, x1 ## 2; \ 52262306a36Sopenharmony_ci movdqa x3 ## 1, x4 ## 1; \ 52362306a36Sopenharmony_ci psrld $7, x3 ## 1; \ 52462306a36Sopenharmony_ci pslld $(32 - 7), x4 ## 1; \ 52562306a36Sopenharmony_ci por x4 ## 1, x3 ## 1; \ 52662306a36Sopenharmony_ci pxor x0 ## 1, x1 ## 1; \ 52762306a36Sopenharmony_ci movdqa x0 ## 1, x4 ## 1; \ 52862306a36Sopenharmony_ci pslld $3, x4 ## 1; \ 52962306a36Sopenharmony_ci pxor x4 ## 1, x3 ## 1; \ 53062306a36Sopenharmony_ci movdqa x0 ## 1, x4 ## 1; \ 53162306a36Sopenharmony_ci movdqa x3 ## 2, x4 ## 2; \ 53262306a36Sopenharmony_ci psrld $7, x3 ## 2; \ 53362306a36Sopenharmony_ci pslld $(32 - 7), x4 ## 2; \ 53462306a36Sopenharmony_ci por x4 ## 2, x3 ## 2; \ 53562306a36Sopenharmony_ci pxor x0 ## 2, x1 ## 2; \ 53662306a36Sopenharmony_ci movdqa x0 ## 2, x4 ## 2; \ 53762306a36Sopenharmony_ci pslld $3, x4 ## 2; \ 53862306a36Sopenharmony_ci pxor x4 ## 2, x3 ## 2; \ 53962306a36Sopenharmony_ci movdqa x0 ## 2, x4 ## 2; \ 54062306a36Sopenharmony_ci psrld $13, x0 ## 1; \ 54162306a36Sopenharmony_ci pslld $(32 - 13), x4 ## 1; \ 54262306a36Sopenharmony_ci por x4 ## 1, x0 ## 1; \ 54362306a36Sopenharmony_ci pxor x2 ## 1, x1 ## 1; \ 54462306a36Sopenharmony_ci pxor x2 ## 1, x3 ## 1; \ 54562306a36Sopenharmony_ci movdqa x2 ## 1, x4 ## 1; \ 54662306a36Sopenharmony_ci psrld $3, x2 ## 1; \ 54762306a36Sopenharmony_ci pslld $(32 - 3), x4 ## 1; \ 54862306a36Sopenharmony_ci por x4 ## 1, x2 ## 1; \ 54962306a36Sopenharmony_ci psrld $13, x0 ## 2; \ 55062306a36Sopenharmony_ci pslld $(32 - 13), x4 ## 2; \ 55162306a36Sopenharmony_ci por x4 ## 2, x0 ## 2; \ 55262306a36Sopenharmony_ci pxor x2 ## 2, x1 ## 2; \ 55362306a36Sopenharmony_ci pxor x2 ## 2, x3 ## 2; \ 55462306a36Sopenharmony_ci movdqa x2 ## 2, x4 ## 2; \ 55562306a36Sopenharmony_ci psrld $3, x2 ## 2; \ 55662306a36Sopenharmony_ci pslld $(32 - 3), x4 ## 2; \ 55762306a36Sopenharmony_ci por x4 ## 2, x2 ## 2; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci#define S(SBOX, x0, x1, x2, x3, x4) \ 56062306a36Sopenharmony_ci SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 56162306a36Sopenharmony_ci SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 56262306a36Sopenharmony_ci SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 56362306a36Sopenharmony_ci SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci#define SP(SBOX, x0, x1, x2, x3, x4, i) \ 56662306a36Sopenharmony_ci get_key(i, 0, RK0); \ 56762306a36Sopenharmony_ci SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 56862306a36Sopenharmony_ci get_key(i, 2, RK2); \ 56962306a36Sopenharmony_ci SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 57062306a36Sopenharmony_ci get_key(i, 3, RK3); \ 57162306a36Sopenharmony_ci SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ 57262306a36Sopenharmony_ci get_key(i, 1, RK1); \ 57362306a36Sopenharmony_ci SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 57662306a36Sopenharmony_ci movdqa x0, t2; \ 57762306a36Sopenharmony_ci punpckldq x1, x0; \ 57862306a36Sopenharmony_ci punpckhdq x1, t2; \ 57962306a36Sopenharmony_ci movdqa x2, t1; \ 58062306a36Sopenharmony_ci punpckhdq x3, x2; \ 58162306a36Sopenharmony_ci punpckldq x3, t1; \ 58262306a36Sopenharmony_ci movdqa x0, x1; \ 58362306a36Sopenharmony_ci punpcklqdq t1, x0; \ 58462306a36Sopenharmony_ci punpckhqdq t1, x1; \ 58562306a36Sopenharmony_ci movdqa t2, x3; \ 58662306a36Sopenharmony_ci punpcklqdq x2, t2; \ 58762306a36Sopenharmony_ci punpckhqdq x2, x3; \ 58862306a36Sopenharmony_ci movdqa t2, x2; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ 59162306a36Sopenharmony_ci movdqu (0*4*4)(in), x0; \ 59262306a36Sopenharmony_ci movdqu (1*4*4)(in), x1; \ 59362306a36Sopenharmony_ci movdqu (2*4*4)(in), x2; \ 59462306a36Sopenharmony_ci movdqu (3*4*4)(in), x3; \ 59562306a36Sopenharmony_ci \ 59662306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ 59962306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 60062306a36Sopenharmony_ci \ 60162306a36Sopenharmony_ci movdqu x0, (0*4*4)(out); \ 60262306a36Sopenharmony_ci movdqu x1, (1*4*4)(out); \ 60362306a36Sopenharmony_ci movdqu x2, (2*4*4)(out); \ 60462306a36Sopenharmony_ci movdqu x3, (3*4*4)(out); 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ 60762306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 60862306a36Sopenharmony_ci \ 60962306a36Sopenharmony_ci movdqu (0*4*4)(out), t0; \ 61062306a36Sopenharmony_ci pxor t0, x0; \ 61162306a36Sopenharmony_ci movdqu x0, (0*4*4)(out); \ 61262306a36Sopenharmony_ci movdqu (1*4*4)(out), t0; \ 61362306a36Sopenharmony_ci pxor t0, x1; \ 61462306a36Sopenharmony_ci movdqu x1, (1*4*4)(out); \ 61562306a36Sopenharmony_ci movdqu (2*4*4)(out), t0; \ 61662306a36Sopenharmony_ci pxor t0, x2; \ 61762306a36Sopenharmony_ci movdqu x2, (2*4*4)(out); \ 61862306a36Sopenharmony_ci movdqu (3*4*4)(out), t0; \ 61962306a36Sopenharmony_ci pxor t0, x3; \ 62062306a36Sopenharmony_ci movdqu x3, (3*4*4)(out); 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ciSYM_FUNC_START(__serpent_enc_blk_8way) 62362306a36Sopenharmony_ci /* input: 62462306a36Sopenharmony_ci * %rdi: ctx, CTX 62562306a36Sopenharmony_ci * %rsi: dst 62662306a36Sopenharmony_ci * %rdx: src 62762306a36Sopenharmony_ci * %rcx: bool, if true: xor output 62862306a36Sopenharmony_ci */ 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci pcmpeqd RNOT, RNOT; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci leaq (4*4*4)(%rdx), %rax; 63362306a36Sopenharmony_ci read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 63462306a36Sopenharmony_ci read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci K2(RA, RB, RC, RD, RE, 0); 63762306a36Sopenharmony_ci S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); 63862306a36Sopenharmony_ci S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); 63962306a36Sopenharmony_ci S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); 64062306a36Sopenharmony_ci S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); 64162306a36Sopenharmony_ci S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); 64262306a36Sopenharmony_ci S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); 64362306a36Sopenharmony_ci S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); 64462306a36Sopenharmony_ci S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); 64562306a36Sopenharmony_ci S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); 64662306a36Sopenharmony_ci S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); 64762306a36Sopenharmony_ci S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); 64862306a36Sopenharmony_ci S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); 64962306a36Sopenharmony_ci S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); 65062306a36Sopenharmony_ci S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); 65162306a36Sopenharmony_ci S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); 65262306a36Sopenharmony_ci S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); 65362306a36Sopenharmony_ci S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); 65462306a36Sopenharmony_ci S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); 65562306a36Sopenharmony_ci S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); 65662306a36Sopenharmony_ci S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); 65762306a36Sopenharmony_ci S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); 65862306a36Sopenharmony_ci S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); 65962306a36Sopenharmony_ci S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); 66062306a36Sopenharmony_ci S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); 66162306a36Sopenharmony_ci S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); 66262306a36Sopenharmony_ci S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); 66362306a36Sopenharmony_ci S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); 66462306a36Sopenharmony_ci S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); 66562306a36Sopenharmony_ci S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); 66662306a36Sopenharmony_ci S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); 66762306a36Sopenharmony_ci S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); 66862306a36Sopenharmony_ci S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci leaq (4*4*4)(%rsi), %rax; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci testb %cl, %cl; 67362306a36Sopenharmony_ci jnz .L__enc_xor8; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 67662306a36Sopenharmony_ci write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci RET; 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci.L__enc_xor8: 68162306a36Sopenharmony_ci xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 68262306a36Sopenharmony_ci xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci RET; 68562306a36Sopenharmony_ciSYM_FUNC_END(__serpent_enc_blk_8way) 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ciSYM_FUNC_START(serpent_dec_blk_8way) 68862306a36Sopenharmony_ci /* input: 68962306a36Sopenharmony_ci * %rdi: ctx, CTX 69062306a36Sopenharmony_ci * %rsi: dst 69162306a36Sopenharmony_ci * %rdx: src 69262306a36Sopenharmony_ci */ 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci pcmpeqd RNOT, RNOT; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci leaq (4*4*4)(%rdx), %rax; 69762306a36Sopenharmony_ci read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 69862306a36Sopenharmony_ci read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci K2(RA, RB, RC, RD, RE, 32); 70162306a36Sopenharmony_ci SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); 70262306a36Sopenharmony_ci SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); 70362306a36Sopenharmony_ci SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); 70462306a36Sopenharmony_ci SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); 70562306a36Sopenharmony_ci SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); 70662306a36Sopenharmony_ci SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); 70762306a36Sopenharmony_ci SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); 70862306a36Sopenharmony_ci SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); 70962306a36Sopenharmony_ci SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); 71062306a36Sopenharmony_ci SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); 71162306a36Sopenharmony_ci SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); 71262306a36Sopenharmony_ci SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); 71362306a36Sopenharmony_ci SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); 71462306a36Sopenharmony_ci SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); 71562306a36Sopenharmony_ci SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); 71662306a36Sopenharmony_ci SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); 71762306a36Sopenharmony_ci SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); 71862306a36Sopenharmony_ci SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); 71962306a36Sopenharmony_ci SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); 72062306a36Sopenharmony_ci SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); 72162306a36Sopenharmony_ci SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); 72262306a36Sopenharmony_ci SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); 72362306a36Sopenharmony_ci SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); 72462306a36Sopenharmony_ci SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); 72562306a36Sopenharmony_ci SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); 72662306a36Sopenharmony_ci SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); 72762306a36Sopenharmony_ci SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); 72862306a36Sopenharmony_ci SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); 72962306a36Sopenharmony_ci SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); 73062306a36Sopenharmony_ci SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); 73162306a36Sopenharmony_ci SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); 73262306a36Sopenharmony_ci S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci leaq (4*4*4)(%rsi), %rax; 73562306a36Sopenharmony_ci write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); 73662306a36Sopenharmony_ci write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci RET; 73962306a36Sopenharmony_ciSYM_FUNC_END(serpent_dec_blk_8way) 740