18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Shared glue code for 128bit block ciphers, AVX assembler macros 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \ 98c2ecf20Sopenharmony_ci vmovdqu (0*16)(src), x0; \ 108c2ecf20Sopenharmony_ci vmovdqu (1*16)(src), x1; \ 118c2ecf20Sopenharmony_ci vmovdqu (2*16)(src), x2; \ 128c2ecf20Sopenharmony_ci vmovdqu (3*16)(src), x3; \ 138c2ecf20Sopenharmony_ci vmovdqu (4*16)(src), x4; \ 148c2ecf20Sopenharmony_ci vmovdqu (5*16)(src), x5; \ 158c2ecf20Sopenharmony_ci vmovdqu (6*16)(src), x6; \ 168c2ecf20Sopenharmony_ci vmovdqu (7*16)(src), x7; 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ 198c2ecf20Sopenharmony_ci vmovdqu x0, (0*16)(dst); \ 208c2ecf20Sopenharmony_ci vmovdqu x1, (1*16)(dst); \ 218c2ecf20Sopenharmony_ci vmovdqu x2, (2*16)(dst); \ 228c2ecf20Sopenharmony_ci vmovdqu x3, (3*16)(dst); \ 238c2ecf20Sopenharmony_ci vmovdqu x4, (4*16)(dst); \ 248c2ecf20Sopenharmony_ci vmovdqu x5, (5*16)(dst); \ 258c2ecf20Sopenharmony_ci vmovdqu x6, (6*16)(dst); \ 268c2ecf20Sopenharmony_ci vmovdqu x7, (7*16)(dst); 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ 298c2ecf20Sopenharmony_ci vpxor (0*16)(src), x1, x1; \ 308c2ecf20Sopenharmony_ci vpxor (1*16)(src), x2, x2; \ 318c2ecf20Sopenharmony_ci vpxor (2*16)(src), x3, x3; \ 328c2ecf20Sopenharmony_ci vpxor (3*16)(src), x4, x4; \ 338c2ecf20Sopenharmony_ci vpxor (4*16)(src), x5, x5; \ 348c2ecf20Sopenharmony_ci vpxor (5*16)(src), x6, x6; \ 358c2ecf20Sopenharmony_ci vpxor (6*16)(src), x7, x7; \ 368c2ecf20Sopenharmony_ci store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#define inc_le128(x, minus_one, tmp) \ 398c2ecf20Sopenharmony_ci vpcmpeqq minus_one, x, tmp; \ 408c2ecf20Sopenharmony_ci vpsubq minus_one, x, x; \ 418c2ecf20Sopenharmony_ci vpslldq $8, tmp, tmp; \ 428c2ecf20Sopenharmony_ci vpsubq tmp, x, x; 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \ 458c2ecf20Sopenharmony_ci vpcmpeqd t0, t0, t0; \ 468c2ecf20Sopenharmony_ci vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \ 478c2ecf20Sopenharmony_ci vmovdqa bswap, t1; \ 488c2ecf20Sopenharmony_ci \ 498c2ecf20Sopenharmony_ci /* load IV and byteswap */ \ 508c2ecf20Sopenharmony_ci vmovdqu (iv), x7; \ 518c2ecf20Sopenharmony_ci vpshufb t1, x7, x0; \ 528c2ecf20Sopenharmony_ci \ 538c2ecf20Sopenharmony_ci /* construct IVs */ \ 548c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 558c2ecf20Sopenharmony_ci vpshufb t1, x7, x1; \ 568c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 578c2ecf20Sopenharmony_ci vpshufb t1, x7, x2; \ 588c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 598c2ecf20Sopenharmony_ci vpshufb t1, x7, x3; \ 608c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 618c2ecf20Sopenharmony_ci vpshufb t1, x7, x4; \ 628c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 638c2ecf20Sopenharmony_ci vpshufb t1, x7, x5; \ 648c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 658c2ecf20Sopenharmony_ci vpshufb t1, x7, x6; \ 668c2ecf20Sopenharmony_ci inc_le128(x7, t0, t2); \ 678c2ecf20Sopenharmony_ci vmovdqa x7, t2; \ 688c2ecf20Sopenharmony_ci vpshufb t1, x7, x7; \ 698c2ecf20Sopenharmony_ci inc_le128(t2, t0, t1); \ 708c2ecf20Sopenharmony_ci vmovdqu t2, (iv); 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ 738c2ecf20Sopenharmony_ci vpxor (0*16)(src), x0, x0; \ 748c2ecf20Sopenharmony_ci vpxor (1*16)(src), x1, x1; \ 758c2ecf20Sopenharmony_ci vpxor (2*16)(src), x2, x2; \ 768c2ecf20Sopenharmony_ci vpxor (3*16)(src), x3, x3; \ 778c2ecf20Sopenharmony_ci vpxor (4*16)(src), x4, x4; \ 788c2ecf20Sopenharmony_ci vpxor (5*16)(src), x5, x5; \ 798c2ecf20Sopenharmony_ci vpxor (6*16)(src), x6, x6; \ 808c2ecf20Sopenharmony_ci vpxor (7*16)(src), x7, x7; \ 818c2ecf20Sopenharmony_ci store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci#define gf128mul_x_ble(iv, mask, tmp) \ 848c2ecf20Sopenharmony_ci vpsrad $31, iv, tmp; \ 858c2ecf20Sopenharmony_ci vpaddq iv, iv, iv; \ 868c2ecf20Sopenharmony_ci vpshufd $0x13, tmp, tmp; \ 878c2ecf20Sopenharmony_ci vpand mask, tmp, tmp; \ 888c2ecf20Sopenharmony_ci vpxor tmp, iv, iv; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \ 918c2ecf20Sopenharmony_ci t1, xts_gf128mul_and_shl1_mask) \ 928c2ecf20Sopenharmony_ci vmovdqa xts_gf128mul_and_shl1_mask, t0; \ 938c2ecf20Sopenharmony_ci \ 948c2ecf20Sopenharmony_ci /* load IV */ \ 958c2ecf20Sopenharmony_ci vmovdqu (iv), tiv; \ 968c2ecf20Sopenharmony_ci vpxor (0*16)(src), tiv, x0; \ 978c2ecf20Sopenharmony_ci vmovdqu tiv, (0*16)(dst); \ 988c2ecf20Sopenharmony_ci \ 998c2ecf20Sopenharmony_ci /* construct and store IVs, also xor with source */ \ 1008c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1018c2ecf20Sopenharmony_ci vpxor (1*16)(src), tiv, x1; \ 1028c2ecf20Sopenharmony_ci vmovdqu tiv, (1*16)(dst); \ 1038c2ecf20Sopenharmony_ci \ 1048c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1058c2ecf20Sopenharmony_ci vpxor (2*16)(src), tiv, x2; \ 1068c2ecf20Sopenharmony_ci vmovdqu tiv, (2*16)(dst); \ 1078c2ecf20Sopenharmony_ci \ 1088c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1098c2ecf20Sopenharmony_ci vpxor (3*16)(src), tiv, x3; \ 1108c2ecf20Sopenharmony_ci vmovdqu tiv, (3*16)(dst); \ 1118c2ecf20Sopenharmony_ci \ 1128c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1138c2ecf20Sopenharmony_ci vpxor (4*16)(src), tiv, x4; \ 1148c2ecf20Sopenharmony_ci vmovdqu tiv, (4*16)(dst); \ 1158c2ecf20Sopenharmony_ci \ 1168c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1178c2ecf20Sopenharmony_ci vpxor (5*16)(src), tiv, x5; \ 1188c2ecf20Sopenharmony_ci vmovdqu tiv, (5*16)(dst); \ 1198c2ecf20Sopenharmony_ci \ 1208c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1218c2ecf20Sopenharmony_ci vpxor (6*16)(src), tiv, x6; \ 1228c2ecf20Sopenharmony_ci vmovdqu tiv, (6*16)(dst); \ 1238c2ecf20Sopenharmony_ci \ 1248c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1258c2ecf20Sopenharmony_ci vpxor (7*16)(src), tiv, x7; \ 1268c2ecf20Sopenharmony_ci vmovdqu tiv, (7*16)(dst); \ 1278c2ecf20Sopenharmony_ci \ 1288c2ecf20Sopenharmony_ci gf128mul_x_ble(tiv, t0, t1); \ 1298c2ecf20Sopenharmony_ci vmovdqu tiv, (iv); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ 1328c2ecf20Sopenharmony_ci vpxor (0*16)(dst), x0, x0; \ 1338c2ecf20Sopenharmony_ci vpxor (1*16)(dst), x1, x1; \ 1348c2ecf20Sopenharmony_ci vpxor (2*16)(dst), x2, x2; \ 1358c2ecf20Sopenharmony_ci vpxor (3*16)(dst), x3, x3; \ 1368c2ecf20Sopenharmony_ci vpxor (4*16)(dst), x4, x4; \ 1378c2ecf20Sopenharmony_ci vpxor (5*16)(dst), x5, x5; \ 1388c2ecf20Sopenharmony_ci vpxor (6*16)(dst), x6, x6; \ 1398c2ecf20Sopenharmony_ci vpxor (7*16)(dst), x7, x7; \ 1408c2ecf20Sopenharmony_ci store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); 141