162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * AES CTR mode by8 optimization with AVX instructions. (x86_64) 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright(c) 2014 Intel Corporation. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Contact Information: 862306a36Sopenharmony_ci * James Guilford <james.guilford@intel.com> 962306a36Sopenharmony_ci * Sean Gulley <sean.m.gulley@intel.com> 1062306a36Sopenharmony_ci * Chandramouli Narayanan <mouli@linux.intel.com> 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci/* 1362306a36Sopenharmony_ci * This is AES128/192/256 CTR mode optimization implementation. It requires 1462306a36Sopenharmony_ci * the support of Intel(R) AESNI and AVX instructions. 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * This work was inspired by the AES CTR mode optimization published 1762306a36Sopenharmony_ci * in Intel Optimized IPSEC Cryptographic library. 1862306a36Sopenharmony_ci * Additional information on it can be found at: 1962306a36Sopenharmony_ci * https://github.com/intel/intel-ipsec-mb 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <linux/linkage.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define VMOVDQ vmovdqu 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* 2762306a36Sopenharmony_ci * Note: the "x" prefix in these aliases means "this is an xmm register". The 2862306a36Sopenharmony_ci * alias prefixes have no relation to XCTR where the "X" prefix means "XOR 2962306a36Sopenharmony_ci * counter". 3062306a36Sopenharmony_ci */ 3162306a36Sopenharmony_ci#define xdata0 %xmm0 3262306a36Sopenharmony_ci#define xdata1 %xmm1 3362306a36Sopenharmony_ci#define xdata2 %xmm2 3462306a36Sopenharmony_ci#define xdata3 %xmm3 3562306a36Sopenharmony_ci#define xdata4 %xmm4 3662306a36Sopenharmony_ci#define xdata5 %xmm5 3762306a36Sopenharmony_ci#define xdata6 %xmm6 3862306a36Sopenharmony_ci#define xdata7 %xmm7 3962306a36Sopenharmony_ci#define xcounter %xmm8 // CTR mode only 4062306a36Sopenharmony_ci#define xiv %xmm8 // XCTR mode only 4162306a36Sopenharmony_ci#define xbyteswap %xmm9 // CTR mode only 4262306a36Sopenharmony_ci#define xtmp %xmm9 // XCTR mode only 4362306a36Sopenharmony_ci#define xkey0 %xmm10 4462306a36Sopenharmony_ci#define xkey4 %xmm11 4562306a36Sopenharmony_ci#define xkey8 %xmm12 4662306a36Sopenharmony_ci#define xkey12 %xmm13 4762306a36Sopenharmony_ci#define xkeyA %xmm14 4862306a36Sopenharmony_ci#define xkeyB %xmm15 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#define p_in %rdi 5162306a36Sopenharmony_ci#define p_iv %rsi 5262306a36Sopenharmony_ci#define p_keys %rdx 5362306a36Sopenharmony_ci#define p_out %rcx 5462306a36Sopenharmony_ci#define num_bytes %r8 5562306a36Sopenharmony_ci#define counter %r9 // XCTR mode only 5662306a36Sopenharmony_ci#define tmp %r10 5762306a36Sopenharmony_ci#define DDQ_DATA 0 5862306a36Sopenharmony_ci#define XDATA 1 5962306a36Sopenharmony_ci#define KEY_128 1 6062306a36Sopenharmony_ci#define KEY_192 2 6162306a36Sopenharmony_ci#define KEY_256 3 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci.section .rodata 6462306a36Sopenharmony_ci.align 16 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cibyteswap_const: 6762306a36Sopenharmony_ci .octa 0x000102030405060708090A0B0C0D0E0F 6862306a36Sopenharmony_ciddq_low_msk: 6962306a36Sopenharmony_ci .octa 0x0000000000000000FFFFFFFFFFFFFFFF 7062306a36Sopenharmony_ciddq_high_add_1: 7162306a36Sopenharmony_ci .octa 0x00000000000000010000000000000000 7262306a36Sopenharmony_ciddq_add_1: 7362306a36Sopenharmony_ci .octa 0x00000000000000000000000000000001 7462306a36Sopenharmony_ciddq_add_2: 7562306a36Sopenharmony_ci .octa 0x00000000000000000000000000000002 7662306a36Sopenharmony_ciddq_add_3: 7762306a36Sopenharmony_ci .octa 0x00000000000000000000000000000003 7862306a36Sopenharmony_ciddq_add_4: 7962306a36Sopenharmony_ci .octa 0x00000000000000000000000000000004 8062306a36Sopenharmony_ciddq_add_5: 8162306a36Sopenharmony_ci .octa 0x00000000000000000000000000000005 8262306a36Sopenharmony_ciddq_add_6: 8362306a36Sopenharmony_ci .octa 0x00000000000000000000000000000006 8462306a36Sopenharmony_ciddq_add_7: 8562306a36Sopenharmony_ci .octa 0x00000000000000000000000000000007 8662306a36Sopenharmony_ciddq_add_8: 8762306a36Sopenharmony_ci .octa 0x00000000000000000000000000000008 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci.text 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci/* generate a unique variable for ddq_add_x */ 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci/* generate a unique variable for xmm register */ 9462306a36Sopenharmony_ci.macro setxdata n 9562306a36Sopenharmony_ci var_xdata = %xmm\n 9662306a36Sopenharmony_ci.endm 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci/* club the numeric 'id' to the symbol 'name' */ 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci.macro club name, id 10162306a36Sopenharmony_ci.altmacro 10262306a36Sopenharmony_ci .if \name == XDATA 10362306a36Sopenharmony_ci setxdata %\id 10462306a36Sopenharmony_ci .endif 10562306a36Sopenharmony_ci.noaltmacro 10662306a36Sopenharmony_ci.endm 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci/* 10962306a36Sopenharmony_ci * do_aes num_in_par load_keys key_len 11062306a36Sopenharmony_ci * This increments p_in, but not p_out 11162306a36Sopenharmony_ci */ 11262306a36Sopenharmony_ci.macro do_aes b, k, key_len, xctr 11362306a36Sopenharmony_ci .set by, \b 11462306a36Sopenharmony_ci .set load_keys, \k 11562306a36Sopenharmony_ci .set klen, \key_len 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci .if (load_keys) 11862306a36Sopenharmony_ci vmovdqa 0*16(p_keys), xkey0 11962306a36Sopenharmony_ci .endif 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci .if \xctr 12262306a36Sopenharmony_ci movq counter, xtmp 12362306a36Sopenharmony_ci .set i, 0 12462306a36Sopenharmony_ci .rept (by) 12562306a36Sopenharmony_ci club XDATA, i 12662306a36Sopenharmony_ci vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata 12762306a36Sopenharmony_ci .set i, (i +1) 12862306a36Sopenharmony_ci .endr 12962306a36Sopenharmony_ci .set i, 0 13062306a36Sopenharmony_ci .rept (by) 13162306a36Sopenharmony_ci club XDATA, i 13262306a36Sopenharmony_ci vpxor xiv, var_xdata, var_xdata 13362306a36Sopenharmony_ci .set i, (i +1) 13462306a36Sopenharmony_ci .endr 13562306a36Sopenharmony_ci .else 13662306a36Sopenharmony_ci vpshufb xbyteswap, xcounter, xdata0 13762306a36Sopenharmony_ci .set i, 1 13862306a36Sopenharmony_ci .rept (by - 1) 13962306a36Sopenharmony_ci club XDATA, i 14062306a36Sopenharmony_ci vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata 14162306a36Sopenharmony_ci vptest ddq_low_msk(%rip), var_xdata 14262306a36Sopenharmony_ci jnz 1f 14362306a36Sopenharmony_ci vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata 14462306a36Sopenharmony_ci vpaddq ddq_high_add_1(%rip), xcounter, xcounter 14562306a36Sopenharmony_ci 1: 14662306a36Sopenharmony_ci vpshufb xbyteswap, var_xdata, var_xdata 14762306a36Sopenharmony_ci .set i, (i +1) 14862306a36Sopenharmony_ci .endr 14962306a36Sopenharmony_ci .endif 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci vmovdqa 1*16(p_keys), xkeyA 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci vpxor xkey0, xdata0, xdata0 15462306a36Sopenharmony_ci .if \xctr 15562306a36Sopenharmony_ci add $by, counter 15662306a36Sopenharmony_ci .else 15762306a36Sopenharmony_ci vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter 15862306a36Sopenharmony_ci vptest ddq_low_msk(%rip), xcounter 15962306a36Sopenharmony_ci jnz 1f 16062306a36Sopenharmony_ci vpaddq ddq_high_add_1(%rip), xcounter, xcounter 16162306a36Sopenharmony_ci 1: 16262306a36Sopenharmony_ci .endif 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci .set i, 1 16562306a36Sopenharmony_ci .rept (by - 1) 16662306a36Sopenharmony_ci club XDATA, i 16762306a36Sopenharmony_ci vpxor xkey0, var_xdata, var_xdata 16862306a36Sopenharmony_ci .set i, (i +1) 16962306a36Sopenharmony_ci .endr 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci vmovdqa 2*16(p_keys), xkeyB 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci .set i, 0 17462306a36Sopenharmony_ci .rept by 17562306a36Sopenharmony_ci club XDATA, i 17662306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ 17762306a36Sopenharmony_ci .set i, (i +1) 17862306a36Sopenharmony_ci .endr 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci .if (klen == KEY_128) 18162306a36Sopenharmony_ci .if (load_keys) 18262306a36Sopenharmony_ci vmovdqa 3*16(p_keys), xkey4 18362306a36Sopenharmony_ci .endif 18462306a36Sopenharmony_ci .else 18562306a36Sopenharmony_ci vmovdqa 3*16(p_keys), xkeyA 18662306a36Sopenharmony_ci .endif 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci .set i, 0 18962306a36Sopenharmony_ci .rept by 19062306a36Sopenharmony_ci club XDATA, i 19162306a36Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ 19262306a36Sopenharmony_ci .set i, (i +1) 19362306a36Sopenharmony_ci .endr 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci add $(16*by), p_in 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci .if (klen == KEY_128) 19862306a36Sopenharmony_ci vmovdqa 4*16(p_keys), xkeyB 19962306a36Sopenharmony_ci .else 20062306a36Sopenharmony_ci .if (load_keys) 20162306a36Sopenharmony_ci vmovdqa 4*16(p_keys), xkey4 20262306a36Sopenharmony_ci .endif 20362306a36Sopenharmony_ci .endif 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci .set i, 0 20662306a36Sopenharmony_ci .rept by 20762306a36Sopenharmony_ci club XDATA, i 20862306a36Sopenharmony_ci /* key 3 */ 20962306a36Sopenharmony_ci .if (klen == KEY_128) 21062306a36Sopenharmony_ci vaesenc xkey4, var_xdata, var_xdata 21162306a36Sopenharmony_ci .else 21262306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata 21362306a36Sopenharmony_ci .endif 21462306a36Sopenharmony_ci .set i, (i +1) 21562306a36Sopenharmony_ci .endr 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci vmovdqa 5*16(p_keys), xkeyA 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci .set i, 0 22062306a36Sopenharmony_ci .rept by 22162306a36Sopenharmony_ci club XDATA, i 22262306a36Sopenharmony_ci /* key 4 */ 22362306a36Sopenharmony_ci .if (klen == KEY_128) 22462306a36Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 22562306a36Sopenharmony_ci .else 22662306a36Sopenharmony_ci vaesenc xkey4, var_xdata, var_xdata 22762306a36Sopenharmony_ci .endif 22862306a36Sopenharmony_ci .set i, (i +1) 22962306a36Sopenharmony_ci .endr 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci .if (klen == KEY_128) 23262306a36Sopenharmony_ci .if (load_keys) 23362306a36Sopenharmony_ci vmovdqa 6*16(p_keys), xkey8 23462306a36Sopenharmony_ci .endif 23562306a36Sopenharmony_ci .else 23662306a36Sopenharmony_ci vmovdqa 6*16(p_keys), xkeyB 23762306a36Sopenharmony_ci .endif 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci .set i, 0 24062306a36Sopenharmony_ci .rept by 24162306a36Sopenharmony_ci club XDATA, i 24262306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ 24362306a36Sopenharmony_ci .set i, (i +1) 24462306a36Sopenharmony_ci .endr 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci vmovdqa 7*16(p_keys), xkeyA 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci .set i, 0 24962306a36Sopenharmony_ci .rept by 25062306a36Sopenharmony_ci club XDATA, i 25162306a36Sopenharmony_ci /* key 6 */ 25262306a36Sopenharmony_ci .if (klen == KEY_128) 25362306a36Sopenharmony_ci vaesenc xkey8, var_xdata, var_xdata 25462306a36Sopenharmony_ci .else 25562306a36Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 25662306a36Sopenharmony_ci .endif 25762306a36Sopenharmony_ci .set i, (i +1) 25862306a36Sopenharmony_ci .endr 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci .if (klen == KEY_128) 26162306a36Sopenharmony_ci vmovdqa 8*16(p_keys), xkeyB 26262306a36Sopenharmony_ci .else 26362306a36Sopenharmony_ci .if (load_keys) 26462306a36Sopenharmony_ci vmovdqa 8*16(p_keys), xkey8 26562306a36Sopenharmony_ci .endif 26662306a36Sopenharmony_ci .endif 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci .set i, 0 26962306a36Sopenharmony_ci .rept by 27062306a36Sopenharmony_ci club XDATA, i 27162306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ 27262306a36Sopenharmony_ci .set i, (i +1) 27362306a36Sopenharmony_ci .endr 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci .if (klen == KEY_128) 27662306a36Sopenharmony_ci .if (load_keys) 27762306a36Sopenharmony_ci vmovdqa 9*16(p_keys), xkey12 27862306a36Sopenharmony_ci .endif 27962306a36Sopenharmony_ci .else 28062306a36Sopenharmony_ci vmovdqa 9*16(p_keys), xkeyA 28162306a36Sopenharmony_ci .endif 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci .set i, 0 28462306a36Sopenharmony_ci .rept by 28562306a36Sopenharmony_ci club XDATA, i 28662306a36Sopenharmony_ci /* key 8 */ 28762306a36Sopenharmony_ci .if (klen == KEY_128) 28862306a36Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 28962306a36Sopenharmony_ci .else 29062306a36Sopenharmony_ci vaesenc xkey8, var_xdata, var_xdata 29162306a36Sopenharmony_ci .endif 29262306a36Sopenharmony_ci .set i, (i +1) 29362306a36Sopenharmony_ci .endr 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci vmovdqa 10*16(p_keys), xkeyB 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci .set i, 0 29862306a36Sopenharmony_ci .rept by 29962306a36Sopenharmony_ci club XDATA, i 30062306a36Sopenharmony_ci /* key 9 */ 30162306a36Sopenharmony_ci .if (klen == KEY_128) 30262306a36Sopenharmony_ci vaesenc xkey12, var_xdata, var_xdata 30362306a36Sopenharmony_ci .else 30462306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata 30562306a36Sopenharmony_ci .endif 30662306a36Sopenharmony_ci .set i, (i +1) 30762306a36Sopenharmony_ci .endr 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci .if (klen != KEY_128) 31062306a36Sopenharmony_ci vmovdqa 11*16(p_keys), xkeyA 31162306a36Sopenharmony_ci .endif 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci .set i, 0 31462306a36Sopenharmony_ci .rept by 31562306a36Sopenharmony_ci club XDATA, i 31662306a36Sopenharmony_ci /* key 10 */ 31762306a36Sopenharmony_ci .if (klen == KEY_128) 31862306a36Sopenharmony_ci vaesenclast xkeyB, var_xdata, var_xdata 31962306a36Sopenharmony_ci .else 32062306a36Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 32162306a36Sopenharmony_ci .endif 32262306a36Sopenharmony_ci .set i, (i +1) 32362306a36Sopenharmony_ci .endr 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci .if (klen != KEY_128) 32662306a36Sopenharmony_ci .if (load_keys) 32762306a36Sopenharmony_ci vmovdqa 12*16(p_keys), xkey12 32862306a36Sopenharmony_ci .endif 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci .set i, 0 33162306a36Sopenharmony_ci .rept by 33262306a36Sopenharmony_ci club XDATA, i 33362306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ 33462306a36Sopenharmony_ci .set i, (i +1) 33562306a36Sopenharmony_ci .endr 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci .if (klen == KEY_256) 33862306a36Sopenharmony_ci vmovdqa 13*16(p_keys), xkeyA 33962306a36Sopenharmony_ci .endif 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci .set i, 0 34262306a36Sopenharmony_ci .rept by 34362306a36Sopenharmony_ci club XDATA, i 34462306a36Sopenharmony_ci .if (klen == KEY_256) 34562306a36Sopenharmony_ci /* key 12 */ 34662306a36Sopenharmony_ci vaesenc xkey12, var_xdata, var_xdata 34762306a36Sopenharmony_ci .else 34862306a36Sopenharmony_ci vaesenclast xkey12, var_xdata, var_xdata 34962306a36Sopenharmony_ci .endif 35062306a36Sopenharmony_ci .set i, (i +1) 35162306a36Sopenharmony_ci .endr 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci .if (klen == KEY_256) 35462306a36Sopenharmony_ci vmovdqa 14*16(p_keys), xkeyB 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci .set i, 0 35762306a36Sopenharmony_ci .rept by 35862306a36Sopenharmony_ci club XDATA, i 35962306a36Sopenharmony_ci /* key 13 */ 36062306a36Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata 36162306a36Sopenharmony_ci .set i, (i +1) 36262306a36Sopenharmony_ci .endr 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci .set i, 0 36562306a36Sopenharmony_ci .rept by 36662306a36Sopenharmony_ci club XDATA, i 36762306a36Sopenharmony_ci /* key 14 */ 36862306a36Sopenharmony_ci vaesenclast xkeyB, var_xdata, var_xdata 36962306a36Sopenharmony_ci .set i, (i +1) 37062306a36Sopenharmony_ci .endr 37162306a36Sopenharmony_ci .endif 37262306a36Sopenharmony_ci .endif 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci .set i, 0 37562306a36Sopenharmony_ci .rept (by / 2) 37662306a36Sopenharmony_ci .set j, (i+1) 37762306a36Sopenharmony_ci VMOVDQ (i*16 - 16*by)(p_in), xkeyA 37862306a36Sopenharmony_ci VMOVDQ (j*16 - 16*by)(p_in), xkeyB 37962306a36Sopenharmony_ci club XDATA, i 38062306a36Sopenharmony_ci vpxor xkeyA, var_xdata, var_xdata 38162306a36Sopenharmony_ci club XDATA, j 38262306a36Sopenharmony_ci vpxor xkeyB, var_xdata, var_xdata 38362306a36Sopenharmony_ci .set i, (i+2) 38462306a36Sopenharmony_ci .endr 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci .if (i < by) 38762306a36Sopenharmony_ci VMOVDQ (i*16 - 16*by)(p_in), xkeyA 38862306a36Sopenharmony_ci club XDATA, i 38962306a36Sopenharmony_ci vpxor xkeyA, var_xdata, var_xdata 39062306a36Sopenharmony_ci .endif 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci .set i, 0 39362306a36Sopenharmony_ci .rept by 39462306a36Sopenharmony_ci club XDATA, i 39562306a36Sopenharmony_ci VMOVDQ var_xdata, i*16(p_out) 39662306a36Sopenharmony_ci .set i, (i+1) 39762306a36Sopenharmony_ci .endr 39862306a36Sopenharmony_ci.endm 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci.macro do_aes_load val, key_len, xctr 40162306a36Sopenharmony_ci do_aes \val, 1, \key_len, \xctr 40262306a36Sopenharmony_ci.endm 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci.macro do_aes_noload val, key_len, xctr 40562306a36Sopenharmony_ci do_aes \val, 0, \key_len, \xctr 40662306a36Sopenharmony_ci.endm 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci/* main body of aes ctr load */ 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci.macro do_aes_ctrmain key_len, xctr 41162306a36Sopenharmony_ci cmp $16, num_bytes 41262306a36Sopenharmony_ci jb .Ldo_return2\xctr\key_len 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci .if \xctr 41562306a36Sopenharmony_ci shr $4, counter 41662306a36Sopenharmony_ci vmovdqu (p_iv), xiv 41762306a36Sopenharmony_ci .else 41862306a36Sopenharmony_ci vmovdqa byteswap_const(%rip), xbyteswap 41962306a36Sopenharmony_ci vmovdqu (p_iv), xcounter 42062306a36Sopenharmony_ci vpshufb xbyteswap, xcounter, xcounter 42162306a36Sopenharmony_ci .endif 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci mov num_bytes, tmp 42462306a36Sopenharmony_ci and $(7*16), tmp 42562306a36Sopenharmony_ci jz .Lmult_of_8_blks\xctr\key_len 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci /* 1 <= tmp <= 7 */ 42862306a36Sopenharmony_ci cmp $(4*16), tmp 42962306a36Sopenharmony_ci jg .Lgt4\xctr\key_len 43062306a36Sopenharmony_ci je .Leq4\xctr\key_len 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci.Llt4\xctr\key_len: 43362306a36Sopenharmony_ci cmp $(2*16), tmp 43462306a36Sopenharmony_ci jg .Leq3\xctr\key_len 43562306a36Sopenharmony_ci je .Leq2\xctr\key_len 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci.Leq1\xctr\key_len: 43862306a36Sopenharmony_ci do_aes_load 1, \key_len, \xctr 43962306a36Sopenharmony_ci add $(1*16), p_out 44062306a36Sopenharmony_ci and $(~7*16), num_bytes 44162306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 44262306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci.Leq2\xctr\key_len: 44562306a36Sopenharmony_ci do_aes_load 2, \key_len, \xctr 44662306a36Sopenharmony_ci add $(2*16), p_out 44762306a36Sopenharmony_ci and $(~7*16), num_bytes 44862306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 44962306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci.Leq3\xctr\key_len: 45362306a36Sopenharmony_ci do_aes_load 3, \key_len, \xctr 45462306a36Sopenharmony_ci add $(3*16), p_out 45562306a36Sopenharmony_ci and $(~7*16), num_bytes 45662306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 45762306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci.Leq4\xctr\key_len: 46062306a36Sopenharmony_ci do_aes_load 4, \key_len, \xctr 46162306a36Sopenharmony_ci add $(4*16), p_out 46262306a36Sopenharmony_ci and $(~7*16), num_bytes 46362306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 46462306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci.Lgt4\xctr\key_len: 46762306a36Sopenharmony_ci cmp $(6*16), tmp 46862306a36Sopenharmony_ci jg .Leq7\xctr\key_len 46962306a36Sopenharmony_ci je .Leq6\xctr\key_len 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci.Leq5\xctr\key_len: 47262306a36Sopenharmony_ci do_aes_load 5, \key_len, \xctr 47362306a36Sopenharmony_ci add $(5*16), p_out 47462306a36Sopenharmony_ci and $(~7*16), num_bytes 47562306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 47662306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci.Leq6\xctr\key_len: 47962306a36Sopenharmony_ci do_aes_load 6, \key_len, \xctr 48062306a36Sopenharmony_ci add $(6*16), p_out 48162306a36Sopenharmony_ci and $(~7*16), num_bytes 48262306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 48362306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci.Leq7\xctr\key_len: 48662306a36Sopenharmony_ci do_aes_load 7, \key_len, \xctr 48762306a36Sopenharmony_ci add $(7*16), p_out 48862306a36Sopenharmony_ci and $(~7*16), num_bytes 48962306a36Sopenharmony_ci jz .Ldo_return2\xctr\key_len 49062306a36Sopenharmony_ci jmp .Lmain_loop2\xctr\key_len 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci.Lmult_of_8_blks\xctr\key_len: 49362306a36Sopenharmony_ci .if (\key_len != KEY_128) 49462306a36Sopenharmony_ci vmovdqa 0*16(p_keys), xkey0 49562306a36Sopenharmony_ci vmovdqa 4*16(p_keys), xkey4 49662306a36Sopenharmony_ci vmovdqa 8*16(p_keys), xkey8 49762306a36Sopenharmony_ci vmovdqa 12*16(p_keys), xkey12 49862306a36Sopenharmony_ci .else 49962306a36Sopenharmony_ci vmovdqa 0*16(p_keys), xkey0 50062306a36Sopenharmony_ci vmovdqa 3*16(p_keys), xkey4 50162306a36Sopenharmony_ci vmovdqa 6*16(p_keys), xkey8 50262306a36Sopenharmony_ci vmovdqa 9*16(p_keys), xkey12 50362306a36Sopenharmony_ci .endif 50462306a36Sopenharmony_ci.align 16 50562306a36Sopenharmony_ci.Lmain_loop2\xctr\key_len: 50662306a36Sopenharmony_ci /* num_bytes is a multiple of 8 and >0 */ 50762306a36Sopenharmony_ci do_aes_noload 8, \key_len, \xctr 50862306a36Sopenharmony_ci add $(8*16), p_out 50962306a36Sopenharmony_ci sub $(8*16), num_bytes 51062306a36Sopenharmony_ci jne .Lmain_loop2\xctr\key_len 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci.Ldo_return2\xctr\key_len: 51362306a36Sopenharmony_ci .if !\xctr 51462306a36Sopenharmony_ci /* return updated IV */ 51562306a36Sopenharmony_ci vpshufb xbyteswap, xcounter, xcounter 51662306a36Sopenharmony_ci vmovdqu xcounter, (p_iv) 51762306a36Sopenharmony_ci .endif 51862306a36Sopenharmony_ci RET 51962306a36Sopenharmony_ci.endm 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci/* 52262306a36Sopenharmony_ci * routine to do AES128 CTR enc/decrypt "by8" 52362306a36Sopenharmony_ci * XMM registers are clobbered. 52462306a36Sopenharmony_ci * Saving/restoring must be done at a higher level 52562306a36Sopenharmony_ci * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, 52662306a36Sopenharmony_ci * unsigned int num_bytes) 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ciSYM_FUNC_START(aes_ctr_enc_128_avx_by8) 52962306a36Sopenharmony_ci /* call the aes main loop */ 53062306a36Sopenharmony_ci do_aes_ctrmain KEY_128 0 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ciSYM_FUNC_END(aes_ctr_enc_128_avx_by8) 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci/* 53562306a36Sopenharmony_ci * routine to do AES192 CTR enc/decrypt "by8" 53662306a36Sopenharmony_ci * XMM registers are clobbered. 53762306a36Sopenharmony_ci * Saving/restoring must be done at a higher level 53862306a36Sopenharmony_ci * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, 53962306a36Sopenharmony_ci * unsigned int num_bytes) 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_ciSYM_FUNC_START(aes_ctr_enc_192_avx_by8) 54262306a36Sopenharmony_ci /* call the aes main loop */ 54362306a36Sopenharmony_ci do_aes_ctrmain KEY_192 0 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ciSYM_FUNC_END(aes_ctr_enc_192_avx_by8) 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/* 54862306a36Sopenharmony_ci * routine to do AES256 CTR enc/decrypt "by8" 54962306a36Sopenharmony_ci * XMM registers are clobbered. 55062306a36Sopenharmony_ci * Saving/restoring must be done at a higher level 55162306a36Sopenharmony_ci * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, 55262306a36Sopenharmony_ci * unsigned int num_bytes) 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ciSYM_FUNC_START(aes_ctr_enc_256_avx_by8) 55562306a36Sopenharmony_ci /* call the aes main loop */ 55662306a36Sopenharmony_ci do_aes_ctrmain KEY_256 0 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ciSYM_FUNC_END(aes_ctr_enc_256_avx_by8) 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci/* 56162306a36Sopenharmony_ci * routine to do AES128 XCTR enc/decrypt "by8" 56262306a36Sopenharmony_ci * XMM registers are clobbered. 56362306a36Sopenharmony_ci * Saving/restoring must be done at a higher level 56462306a36Sopenharmony_ci * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys, 56562306a36Sopenharmony_ci * u8* out, unsigned int num_bytes, unsigned int byte_ctr) 56662306a36Sopenharmony_ci */ 56762306a36Sopenharmony_ciSYM_FUNC_START(aes_xctr_enc_128_avx_by8) 56862306a36Sopenharmony_ci /* call the aes main loop */ 56962306a36Sopenharmony_ci do_aes_ctrmain KEY_128 1 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ciSYM_FUNC_END(aes_xctr_enc_128_avx_by8) 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci/* 57462306a36Sopenharmony_ci * routine to do AES192 XCTR enc/decrypt "by8" 57562306a36Sopenharmony_ci * XMM registers are clobbered. 57662306a36Sopenharmony_ci * Saving/restoring must be done at a higher level 57762306a36Sopenharmony_ci * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys, 57862306a36Sopenharmony_ci * u8* out, unsigned int num_bytes, unsigned int byte_ctr) 57962306a36Sopenharmony_ci */ 58062306a36Sopenharmony_ciSYM_FUNC_START(aes_xctr_enc_192_avx_by8) 58162306a36Sopenharmony_ci /* call the aes main loop */ 58262306a36Sopenharmony_ci do_aes_ctrmain KEY_192 1 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ciSYM_FUNC_END(aes_xctr_enc_192_avx_by8) 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci/* 58762306a36Sopenharmony_ci * routine to do AES256 XCTR enc/decrypt "by8" 58862306a36Sopenharmony_ci * XMM registers are clobbered. 58962306a36Sopenharmony_ci * Saving/restoring must be done at a higher level 59062306a36Sopenharmony_ci * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys, 59162306a36Sopenharmony_ci * u8* out, unsigned int num_bytes, unsigned int byte_ctr) 59262306a36Sopenharmony_ci */ 59362306a36Sopenharmony_ciSYM_FUNC_START(aes_xctr_enc_256_avx_by8) 59462306a36Sopenharmony_ci /* call the aes main loop */ 59562306a36Sopenharmony_ci do_aes_ctrmain KEY_256 1 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ciSYM_FUNC_END(aes_xctr_enc_256_avx_by8) 598