18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This is AES128/192/256 CTR mode optimization implementation. It requires 58c2ecf20Sopenharmony_ci * the support of Intel(R) AESNI and AVX instructions. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * This work was inspired by the AES CTR mode optimization published 88c2ecf20Sopenharmony_ci * in Intel Optimized IPSEC Cryptograhpic library. 98c2ecf20Sopenharmony_ci * Additional information on it can be found at: 108c2ecf20Sopenharmony_ci * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license. When using or 138c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY 168c2ecf20Sopenharmony_ci * 178c2ecf20Sopenharmony_ci * Copyright(c) 2014 Intel Corporation. 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 208c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as 218c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 248c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 258c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 268c2ecf20Sopenharmony_ci * General Public License for more details. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * Contact Information: 298c2ecf20Sopenharmony_ci * James Guilford <james.guilford@intel.com> 308c2ecf20Sopenharmony_ci * Sean Gulley <sean.m.gulley@intel.com> 318c2ecf20Sopenharmony_ci * Chandramouli Narayanan <mouli@linux.intel.com> 328c2ecf20Sopenharmony_ci * 338c2ecf20Sopenharmony_ci * BSD LICENSE 348c2ecf20Sopenharmony_ci * 358c2ecf20Sopenharmony_ci * Copyright(c) 2014 Intel Corporation. 368c2ecf20Sopenharmony_ci * 378c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 388c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 398c2ecf20Sopenharmony_ci * are met: 408c2ecf20Sopenharmony_ci * 418c2ecf20Sopenharmony_ci * Redistributions of source code must retain the above copyright 428c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 438c2ecf20Sopenharmony_ci * Redistributions in binary form must reproduce the above copyright 448c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer in 458c2ecf20Sopenharmony_ci * the documentation and/or other materials provided with the 468c2ecf20Sopenharmony_ci * distribution. 478c2ecf20Sopenharmony_ci * Neither the name of Intel Corporation nor the names of its 488c2ecf20Sopenharmony_ci * contributors may be used to endorse or promote products derived 498c2ecf20Sopenharmony_ci * from this software without specific prior written permission. 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 528c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 538c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 548c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 558c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 568c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 578c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 588c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 598c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 608c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 618c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 628c2ecf20Sopenharmony_ci * 638c2ecf20Sopenharmony_ci */ 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci#include <linux/linkage.h> 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci#define VMOVDQ vmovdqu 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci#define xdata0 %xmm0 708c2ecf20Sopenharmony_ci#define xdata1 %xmm1 718c2ecf20Sopenharmony_ci#define xdata2 %xmm2 728c2ecf20Sopenharmony_ci#define xdata3 %xmm3 738c2ecf20Sopenharmony_ci#define xdata4 %xmm4 748c2ecf20Sopenharmony_ci#define xdata5 %xmm5 758c2ecf20Sopenharmony_ci#define xdata6 %xmm6 768c2ecf20Sopenharmony_ci#define xdata7 %xmm7 778c2ecf20Sopenharmony_ci#define xcounter %xmm8 788c2ecf20Sopenharmony_ci#define xbyteswap %xmm9 798c2ecf20Sopenharmony_ci#define xkey0 %xmm10 808c2ecf20Sopenharmony_ci#define xkey4 %xmm11 818c2ecf20Sopenharmony_ci#define xkey8 %xmm12 828c2ecf20Sopenharmony_ci#define xkey12 %xmm13 838c2ecf20Sopenharmony_ci#define xkeyA %xmm14 848c2ecf20Sopenharmony_ci#define xkeyB %xmm15 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci#define p_in %rdi 878c2ecf20Sopenharmony_ci#define p_iv %rsi 888c2ecf20Sopenharmony_ci#define p_keys %rdx 898c2ecf20Sopenharmony_ci#define p_out %rcx 908c2ecf20Sopenharmony_ci#define num_bytes %r8 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci#define tmp %r10 938c2ecf20Sopenharmony_ci#define DDQ_DATA 0 948c2ecf20Sopenharmony_ci#define XDATA 1 958c2ecf20Sopenharmony_ci#define KEY_128 1 968c2ecf20Sopenharmony_ci#define KEY_192 2 978c2ecf20Sopenharmony_ci#define KEY_256 3 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci.section .rodata 1008c2ecf20Sopenharmony_ci.align 16 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_cibyteswap_const: 1038c2ecf20Sopenharmony_ci .octa 0x000102030405060708090A0B0C0D0E0F 1048c2ecf20Sopenharmony_ciddq_low_msk: 1058c2ecf20Sopenharmony_ci .octa 0x0000000000000000FFFFFFFFFFFFFFFF 1068c2ecf20Sopenharmony_ciddq_high_add_1: 1078c2ecf20Sopenharmony_ci .octa 0x00000000000000010000000000000000 1088c2ecf20Sopenharmony_ciddq_add_1: 1098c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000001 1108c2ecf20Sopenharmony_ciddq_add_2: 1118c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000002 1128c2ecf20Sopenharmony_ciddq_add_3: 1138c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000003 1148c2ecf20Sopenharmony_ciddq_add_4: 1158c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000004 1168c2ecf20Sopenharmony_ciddq_add_5: 1178c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000005 1188c2ecf20Sopenharmony_ciddq_add_6: 1198c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000006 1208c2ecf20Sopenharmony_ciddq_add_7: 1218c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000007 1228c2ecf20Sopenharmony_ciddq_add_8: 1238c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000008 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci.text 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci/* generate a unique variable for ddq_add_x */ 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci/* generate a unique variable for xmm register */ 1308c2ecf20Sopenharmony_ci.macro setxdata n 1318c2ecf20Sopenharmony_ci var_xdata = %xmm\n 1328c2ecf20Sopenharmony_ci.endm 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/* club the numeric 'id' to the symbol 'name' */ 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci.macro club name, id 1378c2ecf20Sopenharmony_ci.altmacro 1388c2ecf20Sopenharmony_ci .if \name == XDATA 1398c2ecf20Sopenharmony_ci setxdata %\id 1408c2ecf20Sopenharmony_ci .endif 1418c2ecf20Sopenharmony_ci.noaltmacro 1428c2ecf20Sopenharmony_ci.endm 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci/* 1458c2ecf20Sopenharmony_ci * do_aes num_in_par load_keys key_len 1468c2ecf20Sopenharmony_ci * This increments p_in, but not p_out 1478c2ecf20Sopenharmony_ci */ 1488c2ecf20Sopenharmony_ci.macro do_aes b, k, key_len 1498c2ecf20Sopenharmony_ci .set by, \b 1508c2ecf20Sopenharmony_ci .set load_keys, \k 1518c2ecf20Sopenharmony_ci .set klen, \key_len 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci .if (load_keys) 1548c2ecf20Sopenharmony_ci vmovdqa 0*16(p_keys), xkey0 1558c2ecf20Sopenharmony_ci .endif 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci vpshufb xbyteswap, xcounter, xdata0 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci .set i, 1 1608c2ecf20Sopenharmony_ci .rept (by - 1) 1618c2ecf20Sopenharmony_ci club XDATA, i 1628c2ecf20Sopenharmony_ci vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata 1638c2ecf20Sopenharmony_ci vptest ddq_low_msk(%rip), var_xdata 1648c2ecf20Sopenharmony_ci jnz 1f 1658c2ecf20Sopenharmony_ci vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata 1668c2ecf20Sopenharmony_ci vpaddq ddq_high_add_1(%rip), xcounter, xcounter 1678c2ecf20Sopenharmony_ci 1: 1688c2ecf20Sopenharmony_ci vpshufb xbyteswap, var_xdata, var_xdata 1698c2ecf20Sopenharmony_ci .set i, (i +1) 1708c2ecf20Sopenharmony_ci .endr 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci vmovdqa 1*16(p_keys), xkeyA 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci vpxor xkey0, xdata0, xdata0 1758c2ecf20Sopenharmony_ci vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter 1768c2ecf20Sopenharmony_ci vptest ddq_low_msk(%rip), xcounter 1778c2ecf20Sopenharmony_ci jnz 1f 1788c2ecf20Sopenharmony_ci vpaddq ddq_high_add_1(%rip), xcounter, xcounter 1798c2ecf20Sopenharmony_ci 1: 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci .set i, 1 1828c2ecf20Sopenharmony_ci .rept (by - 1) 1838c2ecf20Sopenharmony_ci club XDATA, i 1848c2ecf20Sopenharmony_ci vpxor xkey0, var_xdata, var_xdata 1858c2ecf20Sopenharmony_ci .set i, (i +1) 1868c2ecf20Sopenharmony_ci .endr 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci vmovdqa 2*16(p_keys), xkeyB 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci .set i, 0 1918c2ecf20Sopenharmony_ci .rept by 1928c2ecf20Sopenharmony_ci club XDATA, i 1938c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ 1948c2ecf20Sopenharmony_ci .set i, (i +1) 1958c2ecf20Sopenharmony_ci .endr 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci .if (klen == KEY_128) 1988c2ecf20Sopenharmony_ci .if (load_keys) 1998c2ecf20Sopenharmony_ci vmovdqa 3*16(p_keys), xkey4 2008c2ecf20Sopenharmony_ci .endif 2018c2ecf20Sopenharmony_ci .else 2028c2ecf20Sopenharmony_ci vmovdqa 3*16(p_keys), xkeyA 2038c2ecf20Sopenharmony_ci .endif 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci .set i, 0 2068c2ecf20Sopenharmony_ci .rept by 2078c2ecf20Sopenharmony_ci club XDATA, i 2088c2ecf20Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ 2098c2ecf20Sopenharmony_ci .set i, (i +1) 2108c2ecf20Sopenharmony_ci .endr 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci add $(16*by), p_in 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2158c2ecf20Sopenharmony_ci vmovdqa 4*16(p_keys), xkeyB 2168c2ecf20Sopenharmony_ci .else 2178c2ecf20Sopenharmony_ci .if (load_keys) 2188c2ecf20Sopenharmony_ci vmovdqa 4*16(p_keys), xkey4 2198c2ecf20Sopenharmony_ci .endif 2208c2ecf20Sopenharmony_ci .endif 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci .set i, 0 2238c2ecf20Sopenharmony_ci .rept by 2248c2ecf20Sopenharmony_ci club XDATA, i 2258c2ecf20Sopenharmony_ci /* key 3 */ 2268c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2278c2ecf20Sopenharmony_ci vaesenc xkey4, var_xdata, var_xdata 2288c2ecf20Sopenharmony_ci .else 2298c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata 2308c2ecf20Sopenharmony_ci .endif 2318c2ecf20Sopenharmony_ci .set i, (i +1) 2328c2ecf20Sopenharmony_ci .endr 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci vmovdqa 5*16(p_keys), xkeyA 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci .set i, 0 2378c2ecf20Sopenharmony_ci .rept by 2388c2ecf20Sopenharmony_ci club XDATA, i 2398c2ecf20Sopenharmony_ci /* key 4 */ 2408c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2418c2ecf20Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 2428c2ecf20Sopenharmony_ci .else 2438c2ecf20Sopenharmony_ci vaesenc xkey4, var_xdata, var_xdata 2448c2ecf20Sopenharmony_ci .endif 2458c2ecf20Sopenharmony_ci .set i, (i +1) 2468c2ecf20Sopenharmony_ci .endr 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2498c2ecf20Sopenharmony_ci .if (load_keys) 2508c2ecf20Sopenharmony_ci vmovdqa 6*16(p_keys), xkey8 2518c2ecf20Sopenharmony_ci .endif 2528c2ecf20Sopenharmony_ci .else 2538c2ecf20Sopenharmony_ci vmovdqa 6*16(p_keys), xkeyB 2548c2ecf20Sopenharmony_ci .endif 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci .set i, 0 2578c2ecf20Sopenharmony_ci .rept by 2588c2ecf20Sopenharmony_ci club XDATA, i 2598c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ 2608c2ecf20Sopenharmony_ci .set i, (i +1) 2618c2ecf20Sopenharmony_ci .endr 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci vmovdqa 7*16(p_keys), xkeyA 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci .set i, 0 2668c2ecf20Sopenharmony_ci .rept by 2678c2ecf20Sopenharmony_ci club XDATA, i 2688c2ecf20Sopenharmony_ci /* key 6 */ 2698c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2708c2ecf20Sopenharmony_ci vaesenc xkey8, var_xdata, var_xdata 2718c2ecf20Sopenharmony_ci .else 2728c2ecf20Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 2738c2ecf20Sopenharmony_ci .endif 2748c2ecf20Sopenharmony_ci .set i, (i +1) 2758c2ecf20Sopenharmony_ci .endr 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2788c2ecf20Sopenharmony_ci vmovdqa 8*16(p_keys), xkeyB 2798c2ecf20Sopenharmony_ci .else 2808c2ecf20Sopenharmony_ci .if (load_keys) 2818c2ecf20Sopenharmony_ci vmovdqa 8*16(p_keys), xkey8 2828c2ecf20Sopenharmony_ci .endif 2838c2ecf20Sopenharmony_ci .endif 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci .set i, 0 2868c2ecf20Sopenharmony_ci .rept by 2878c2ecf20Sopenharmony_ci club XDATA, i 2888c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ 2898c2ecf20Sopenharmony_ci .set i, (i +1) 2908c2ecf20Sopenharmony_ci .endr 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci .if (klen == KEY_128) 2938c2ecf20Sopenharmony_ci .if (load_keys) 2948c2ecf20Sopenharmony_ci vmovdqa 9*16(p_keys), xkey12 2958c2ecf20Sopenharmony_ci .endif 2968c2ecf20Sopenharmony_ci .else 2978c2ecf20Sopenharmony_ci vmovdqa 9*16(p_keys), xkeyA 2988c2ecf20Sopenharmony_ci .endif 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci .set i, 0 3018c2ecf20Sopenharmony_ci .rept by 3028c2ecf20Sopenharmony_ci club XDATA, i 3038c2ecf20Sopenharmony_ci /* key 8 */ 3048c2ecf20Sopenharmony_ci .if (klen == KEY_128) 3058c2ecf20Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 3068c2ecf20Sopenharmony_ci .else 3078c2ecf20Sopenharmony_ci vaesenc xkey8, var_xdata, var_xdata 3088c2ecf20Sopenharmony_ci .endif 3098c2ecf20Sopenharmony_ci .set i, (i +1) 3108c2ecf20Sopenharmony_ci .endr 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci vmovdqa 10*16(p_keys), xkeyB 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci .set i, 0 3158c2ecf20Sopenharmony_ci .rept by 3168c2ecf20Sopenharmony_ci club XDATA, i 3178c2ecf20Sopenharmony_ci /* key 9 */ 3188c2ecf20Sopenharmony_ci .if (klen == KEY_128) 3198c2ecf20Sopenharmony_ci vaesenc xkey12, var_xdata, var_xdata 3208c2ecf20Sopenharmony_ci .else 3218c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata 3228c2ecf20Sopenharmony_ci .endif 3238c2ecf20Sopenharmony_ci .set i, (i +1) 3248c2ecf20Sopenharmony_ci .endr 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci .if (klen != KEY_128) 3278c2ecf20Sopenharmony_ci vmovdqa 11*16(p_keys), xkeyA 3288c2ecf20Sopenharmony_ci .endif 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci .set i, 0 3318c2ecf20Sopenharmony_ci .rept by 3328c2ecf20Sopenharmony_ci club XDATA, i 3338c2ecf20Sopenharmony_ci /* key 10 */ 3348c2ecf20Sopenharmony_ci .if (klen == KEY_128) 3358c2ecf20Sopenharmony_ci vaesenclast xkeyB, var_xdata, var_xdata 3368c2ecf20Sopenharmony_ci .else 3378c2ecf20Sopenharmony_ci vaesenc xkeyB, var_xdata, var_xdata 3388c2ecf20Sopenharmony_ci .endif 3398c2ecf20Sopenharmony_ci .set i, (i +1) 3408c2ecf20Sopenharmony_ci .endr 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci .if (klen != KEY_128) 3438c2ecf20Sopenharmony_ci .if (load_keys) 3448c2ecf20Sopenharmony_ci vmovdqa 12*16(p_keys), xkey12 3458c2ecf20Sopenharmony_ci .endif 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci .set i, 0 3488c2ecf20Sopenharmony_ci .rept by 3498c2ecf20Sopenharmony_ci club XDATA, i 3508c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ 3518c2ecf20Sopenharmony_ci .set i, (i +1) 3528c2ecf20Sopenharmony_ci .endr 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci .if (klen == KEY_256) 3558c2ecf20Sopenharmony_ci vmovdqa 13*16(p_keys), xkeyA 3568c2ecf20Sopenharmony_ci .endif 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci .set i, 0 3598c2ecf20Sopenharmony_ci .rept by 3608c2ecf20Sopenharmony_ci club XDATA, i 3618c2ecf20Sopenharmony_ci .if (klen == KEY_256) 3628c2ecf20Sopenharmony_ci /* key 12 */ 3638c2ecf20Sopenharmony_ci vaesenc xkey12, var_xdata, var_xdata 3648c2ecf20Sopenharmony_ci .else 3658c2ecf20Sopenharmony_ci vaesenclast xkey12, var_xdata, var_xdata 3668c2ecf20Sopenharmony_ci .endif 3678c2ecf20Sopenharmony_ci .set i, (i +1) 3688c2ecf20Sopenharmony_ci .endr 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci .if (klen == KEY_256) 3718c2ecf20Sopenharmony_ci vmovdqa 14*16(p_keys), xkeyB 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci .set i, 0 3748c2ecf20Sopenharmony_ci .rept by 3758c2ecf20Sopenharmony_ci club XDATA, i 3768c2ecf20Sopenharmony_ci /* key 13 */ 3778c2ecf20Sopenharmony_ci vaesenc xkeyA, var_xdata, var_xdata 3788c2ecf20Sopenharmony_ci .set i, (i +1) 3798c2ecf20Sopenharmony_ci .endr 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci .set i, 0 3828c2ecf20Sopenharmony_ci .rept by 3838c2ecf20Sopenharmony_ci club XDATA, i 3848c2ecf20Sopenharmony_ci /* key 14 */ 3858c2ecf20Sopenharmony_ci vaesenclast xkeyB, var_xdata, var_xdata 3868c2ecf20Sopenharmony_ci .set i, (i +1) 3878c2ecf20Sopenharmony_ci .endr 3888c2ecf20Sopenharmony_ci .endif 3898c2ecf20Sopenharmony_ci .endif 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci .set i, 0 3928c2ecf20Sopenharmony_ci .rept (by / 2) 3938c2ecf20Sopenharmony_ci .set j, (i+1) 3948c2ecf20Sopenharmony_ci VMOVDQ (i*16 - 16*by)(p_in), xkeyA 3958c2ecf20Sopenharmony_ci VMOVDQ (j*16 - 16*by)(p_in), xkeyB 3968c2ecf20Sopenharmony_ci club XDATA, i 3978c2ecf20Sopenharmony_ci vpxor xkeyA, var_xdata, var_xdata 3988c2ecf20Sopenharmony_ci club XDATA, j 3998c2ecf20Sopenharmony_ci vpxor xkeyB, var_xdata, var_xdata 4008c2ecf20Sopenharmony_ci .set i, (i+2) 4018c2ecf20Sopenharmony_ci .endr 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci .if (i < by) 4048c2ecf20Sopenharmony_ci VMOVDQ (i*16 - 16*by)(p_in), xkeyA 4058c2ecf20Sopenharmony_ci club XDATA, i 4068c2ecf20Sopenharmony_ci vpxor xkeyA, var_xdata, var_xdata 4078c2ecf20Sopenharmony_ci .endif 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci .set i, 0 4108c2ecf20Sopenharmony_ci .rept by 4118c2ecf20Sopenharmony_ci club XDATA, i 4128c2ecf20Sopenharmony_ci VMOVDQ var_xdata, i*16(p_out) 4138c2ecf20Sopenharmony_ci .set i, (i+1) 4148c2ecf20Sopenharmony_ci .endr 4158c2ecf20Sopenharmony_ci.endm 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci.macro do_aes_load val, key_len 4188c2ecf20Sopenharmony_ci do_aes \val, 1, \key_len 4198c2ecf20Sopenharmony_ci.endm 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci.macro do_aes_noload val, key_len 4228c2ecf20Sopenharmony_ci do_aes \val, 0, \key_len 4238c2ecf20Sopenharmony_ci.endm 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci/* main body of aes ctr load */ 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci.macro do_aes_ctrmain key_len 4288c2ecf20Sopenharmony_ci cmp $16, num_bytes 4298c2ecf20Sopenharmony_ci jb .Ldo_return2\key_len 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci vmovdqa byteswap_const(%rip), xbyteswap 4328c2ecf20Sopenharmony_ci vmovdqu (p_iv), xcounter 4338c2ecf20Sopenharmony_ci vpshufb xbyteswap, xcounter, xcounter 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci mov num_bytes, tmp 4368c2ecf20Sopenharmony_ci and $(7*16), tmp 4378c2ecf20Sopenharmony_ci jz .Lmult_of_8_blks\key_len 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci /* 1 <= tmp <= 7 */ 4408c2ecf20Sopenharmony_ci cmp $(4*16), tmp 4418c2ecf20Sopenharmony_ci jg .Lgt4\key_len 4428c2ecf20Sopenharmony_ci je .Leq4\key_len 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci.Llt4\key_len: 4458c2ecf20Sopenharmony_ci cmp $(2*16), tmp 4468c2ecf20Sopenharmony_ci jg .Leq3\key_len 4478c2ecf20Sopenharmony_ci je .Leq2\key_len 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci.Leq1\key_len: 4508c2ecf20Sopenharmony_ci do_aes_load 1, \key_len 4518c2ecf20Sopenharmony_ci add $(1*16), p_out 4528c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 4538c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 4548c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci.Leq2\key_len: 4578c2ecf20Sopenharmony_ci do_aes_load 2, \key_len 4588c2ecf20Sopenharmony_ci add $(2*16), p_out 4598c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 4608c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 4618c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci.Leq3\key_len: 4658c2ecf20Sopenharmony_ci do_aes_load 3, \key_len 4668c2ecf20Sopenharmony_ci add $(3*16), p_out 4678c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 4688c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 4698c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci.Leq4\key_len: 4728c2ecf20Sopenharmony_ci do_aes_load 4, \key_len 4738c2ecf20Sopenharmony_ci add $(4*16), p_out 4748c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 4758c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 4768c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci.Lgt4\key_len: 4798c2ecf20Sopenharmony_ci cmp $(6*16), tmp 4808c2ecf20Sopenharmony_ci jg .Leq7\key_len 4818c2ecf20Sopenharmony_ci je .Leq6\key_len 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci.Leq5\key_len: 4848c2ecf20Sopenharmony_ci do_aes_load 5, \key_len 4858c2ecf20Sopenharmony_ci add $(5*16), p_out 4868c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 4878c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 4888c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci.Leq6\key_len: 4918c2ecf20Sopenharmony_ci do_aes_load 6, \key_len 4928c2ecf20Sopenharmony_ci add $(6*16), p_out 4938c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 4948c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 4958c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci.Leq7\key_len: 4988c2ecf20Sopenharmony_ci do_aes_load 7, \key_len 4998c2ecf20Sopenharmony_ci add $(7*16), p_out 5008c2ecf20Sopenharmony_ci and $(~7*16), num_bytes 5018c2ecf20Sopenharmony_ci jz .Ldo_return2\key_len 5028c2ecf20Sopenharmony_ci jmp .Lmain_loop2\key_len 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci.Lmult_of_8_blks\key_len: 5058c2ecf20Sopenharmony_ci .if (\key_len != KEY_128) 5068c2ecf20Sopenharmony_ci vmovdqa 0*16(p_keys), xkey0 5078c2ecf20Sopenharmony_ci vmovdqa 4*16(p_keys), xkey4 5088c2ecf20Sopenharmony_ci vmovdqa 8*16(p_keys), xkey8 5098c2ecf20Sopenharmony_ci vmovdqa 12*16(p_keys), xkey12 5108c2ecf20Sopenharmony_ci .else 5118c2ecf20Sopenharmony_ci vmovdqa 0*16(p_keys), xkey0 5128c2ecf20Sopenharmony_ci vmovdqa 3*16(p_keys), xkey4 5138c2ecf20Sopenharmony_ci vmovdqa 6*16(p_keys), xkey8 5148c2ecf20Sopenharmony_ci vmovdqa 9*16(p_keys), xkey12 5158c2ecf20Sopenharmony_ci .endif 5168c2ecf20Sopenharmony_ci.align 16 5178c2ecf20Sopenharmony_ci.Lmain_loop2\key_len: 5188c2ecf20Sopenharmony_ci /* num_bytes is a multiple of 8 and >0 */ 5198c2ecf20Sopenharmony_ci do_aes_noload 8, \key_len 5208c2ecf20Sopenharmony_ci add $(8*16), p_out 5218c2ecf20Sopenharmony_ci sub $(8*16), num_bytes 5228c2ecf20Sopenharmony_ci jne .Lmain_loop2\key_len 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci.Ldo_return2\key_len: 5258c2ecf20Sopenharmony_ci /* return updated IV */ 5268c2ecf20Sopenharmony_ci vpshufb xbyteswap, xcounter, xcounter 5278c2ecf20Sopenharmony_ci vmovdqu xcounter, (p_iv) 5288c2ecf20Sopenharmony_ci RET 5298c2ecf20Sopenharmony_ci.endm 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci/* 5328c2ecf20Sopenharmony_ci * routine to do AES128 CTR enc/decrypt "by8" 5338c2ecf20Sopenharmony_ci * XMM registers are clobbered. 5348c2ecf20Sopenharmony_ci * Saving/restoring must be done at a higher level 5358c2ecf20Sopenharmony_ci * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, 5368c2ecf20Sopenharmony_ci * unsigned int num_bytes) 5378c2ecf20Sopenharmony_ci */ 5388c2ecf20Sopenharmony_ciSYM_FUNC_START(aes_ctr_enc_128_avx_by8) 5398c2ecf20Sopenharmony_ci /* call the aes main loop */ 5408c2ecf20Sopenharmony_ci do_aes_ctrmain KEY_128 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_ctr_enc_128_avx_by8) 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_ci/* 5458c2ecf20Sopenharmony_ci * routine to do AES192 CTR enc/decrypt "by8" 5468c2ecf20Sopenharmony_ci * XMM registers are clobbered. 5478c2ecf20Sopenharmony_ci * Saving/restoring must be done at a higher level 5488c2ecf20Sopenharmony_ci * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, 5498c2ecf20Sopenharmony_ci * unsigned int num_bytes) 5508c2ecf20Sopenharmony_ci */ 5518c2ecf20Sopenharmony_ciSYM_FUNC_START(aes_ctr_enc_192_avx_by8) 5528c2ecf20Sopenharmony_ci /* call the aes main loop */ 5538c2ecf20Sopenharmony_ci do_aes_ctrmain KEY_192 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_ctr_enc_192_avx_by8) 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci/* 5588c2ecf20Sopenharmony_ci * routine to do AES256 CTR enc/decrypt "by8" 5598c2ecf20Sopenharmony_ci * XMM registers are clobbered. 5608c2ecf20Sopenharmony_ci * Saving/restoring must be done at a higher level 5618c2ecf20Sopenharmony_ci * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, 5628c2ecf20Sopenharmony_ci * unsigned int num_bytes) 5638c2ecf20Sopenharmony_ci */ 5648c2ecf20Sopenharmony_ciSYM_FUNC_START(aes_ctr_enc_256_avx_by8) 5658c2ecf20Sopenharmony_ci /* call the aes main loop */ 5668c2ecf20Sopenharmony_ci do_aes_ctrmain KEY_256 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_ctr_enc_256_avx_by8) 569