18c2ecf20Sopenharmony_ci######################################################################## 28c2ecf20Sopenharmony_ci# Copyright (c) 2013, Intel Corporation 38c2ecf20Sopenharmony_ci# 48c2ecf20Sopenharmony_ci# This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci# licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci# General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci# COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci# OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci# 108c2ecf20Sopenharmony_ci# Redistribution and use in source and binary forms, with or without 118c2ecf20Sopenharmony_ci# modification, are permitted provided that the following conditions are 128c2ecf20Sopenharmony_ci# met: 138c2ecf20Sopenharmony_ci# 148c2ecf20Sopenharmony_ci# * Redistributions of source code must retain the above copyright 158c2ecf20Sopenharmony_ci# notice, this list of conditions and the following disclaimer. 168c2ecf20Sopenharmony_ci# 178c2ecf20Sopenharmony_ci# * Redistributions in binary form must reproduce the above copyright 188c2ecf20Sopenharmony_ci# notice, this list of conditions and the following disclaimer in the 198c2ecf20Sopenharmony_ci# documentation and/or other materials provided with the 208c2ecf20Sopenharmony_ci# distribution. 218c2ecf20Sopenharmony_ci# 228c2ecf20Sopenharmony_ci# * Neither the name of the Intel Corporation nor the names of its 238c2ecf20Sopenharmony_ci# contributors may be used to endorse or promote products derived from 248c2ecf20Sopenharmony_ci# this software without specific prior written permission. 258c2ecf20Sopenharmony_ci# 268c2ecf20Sopenharmony_ci# 278c2ecf20Sopenharmony_ci# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY 288c2ecf20Sopenharmony_ci# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 298c2ecf20Sopenharmony_ci# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 308c2ecf20Sopenharmony_ci# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR 318c2ecf20Sopenharmony_ci# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 328c2ecf20Sopenharmony_ci# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 338c2ecf20Sopenharmony_ci# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR 348c2ecf20Sopenharmony_ci# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 358c2ecf20Sopenharmony_ci# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 368c2ecf20Sopenharmony_ci# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 378c2ecf20Sopenharmony_ci# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 388c2ecf20Sopenharmony_ci######################################################################## 398c2ecf20Sopenharmony_ci## 408c2ecf20Sopenharmony_ci## Authors: 418c2ecf20Sopenharmony_ci## Erdinc Ozturk <erdinc.ozturk@intel.com> 428c2ecf20Sopenharmony_ci## Vinodh Gopal <vinodh.gopal@intel.com> 438c2ecf20Sopenharmony_ci## James Guilford <james.guilford@intel.com> 448c2ecf20Sopenharmony_ci## Tim Chen <tim.c.chen@linux.intel.com> 458c2ecf20Sopenharmony_ci## 468c2ecf20Sopenharmony_ci## References: 478c2ecf20Sopenharmony_ci## This code was derived and highly optimized from the code described in paper: 488c2ecf20Sopenharmony_ci## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation 498c2ecf20Sopenharmony_ci## on Intel Architecture Processors. August, 2010 508c2ecf20Sopenharmony_ci## The details of the implementation is explained in: 518c2ecf20Sopenharmony_ci## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode 528c2ecf20Sopenharmony_ci## on Intel Architecture Processors. October, 2012. 538c2ecf20Sopenharmony_ci## 548c2ecf20Sopenharmony_ci## Assumptions: 558c2ecf20Sopenharmony_ci## 568c2ecf20Sopenharmony_ci## 578c2ecf20Sopenharmony_ci## 588c2ecf20Sopenharmony_ci## iv: 598c2ecf20Sopenharmony_ci## 0 1 2 3 608c2ecf20Sopenharmony_ci## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 618c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 628c2ecf20Sopenharmony_ci## | Salt (From the SA) | 638c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 648c2ecf20Sopenharmony_ci## | Initialization Vector | 658c2ecf20Sopenharmony_ci## | (This is the sequence number from IPSec header) | 668c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 678c2ecf20Sopenharmony_ci## | 0x1 | 688c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 698c2ecf20Sopenharmony_ci## 708c2ecf20Sopenharmony_ci## 718c2ecf20Sopenharmony_ci## 728c2ecf20Sopenharmony_ci## AAD: 738c2ecf20Sopenharmony_ci## AAD padded to 128 bits with 0 748c2ecf20Sopenharmony_ci## for example, assume AAD is a u32 vector 758c2ecf20Sopenharmony_ci## 768c2ecf20Sopenharmony_ci## if AAD is 8 bytes: 778c2ecf20Sopenharmony_ci## AAD[3] = {A0, A1}# 788c2ecf20Sopenharmony_ci## padded AAD in xmm register = {A1 A0 0 0} 798c2ecf20Sopenharmony_ci## 808c2ecf20Sopenharmony_ci## 0 1 2 3 818c2ecf20Sopenharmony_ci## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 828c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 838c2ecf20Sopenharmony_ci## | SPI (A1) | 848c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 858c2ecf20Sopenharmony_ci## | 32-bit Sequence Number (A0) | 868c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 878c2ecf20Sopenharmony_ci## | 0x0 | 888c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 898c2ecf20Sopenharmony_ci## 908c2ecf20Sopenharmony_ci## AAD Format with 32-bit Sequence Number 918c2ecf20Sopenharmony_ci## 928c2ecf20Sopenharmony_ci## if AAD is 12 bytes: 938c2ecf20Sopenharmony_ci## AAD[3] = {A0, A1, A2}# 948c2ecf20Sopenharmony_ci## padded AAD in xmm register = {A2 A1 A0 0} 958c2ecf20Sopenharmony_ci## 968c2ecf20Sopenharmony_ci## 0 1 2 3 978c2ecf20Sopenharmony_ci## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 988c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 998c2ecf20Sopenharmony_ci## | SPI (A2) | 1008c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 1018c2ecf20Sopenharmony_ci## | 64-bit Extended Sequence Number {A1,A0} | 1028c2ecf20Sopenharmony_ci## | | 1038c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 1048c2ecf20Sopenharmony_ci## | 0x0 | 1058c2ecf20Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 1068c2ecf20Sopenharmony_ci## 1078c2ecf20Sopenharmony_ci## AAD Format with 64-bit Extended Sequence Number 1088c2ecf20Sopenharmony_ci## 1098c2ecf20Sopenharmony_ci## 1108c2ecf20Sopenharmony_ci## aadLen: 1118c2ecf20Sopenharmony_ci## from the definition of the spec, aadLen can only be 8 or 12 bytes. 1128c2ecf20Sopenharmony_ci## The code additionally supports aadLen of length 16 bytes. 1138c2ecf20Sopenharmony_ci## 1148c2ecf20Sopenharmony_ci## TLen: 1158c2ecf20Sopenharmony_ci## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. 1168c2ecf20Sopenharmony_ci## 1178c2ecf20Sopenharmony_ci## poly = x^128 + x^127 + x^126 + x^121 + 1 1188c2ecf20Sopenharmony_ci## throughout the code, one tab and two tab indentations are used. one tab is 1198c2ecf20Sopenharmony_ci## for GHASH part, two tabs is for AES part. 1208c2ecf20Sopenharmony_ci## 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci#include <linux/linkage.h> 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci# constants in mergeable sections, linker can reorder and merge 1258c2ecf20Sopenharmony_ci.section .rodata.cst16.POLY, "aM", @progbits, 16 1268c2ecf20Sopenharmony_ci.align 16 1278c2ecf20Sopenharmony_ciPOLY: .octa 0xC2000000000000000000000000000001 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci.section .rodata.cst16.POLY2, "aM", @progbits, 16 1308c2ecf20Sopenharmony_ci.align 16 1318c2ecf20Sopenharmony_ciPOLY2: .octa 0xC20000000000000000000001C2000000 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci.section .rodata.cst16.TWOONE, "aM", @progbits, 16 1348c2ecf20Sopenharmony_ci.align 16 1358c2ecf20Sopenharmony_ciTWOONE: .octa 0x00000001000000000000000000000001 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 1388c2ecf20Sopenharmony_ci.align 16 1398c2ecf20Sopenharmony_ciSHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci.section .rodata.cst16.ONE, "aM", @progbits, 16 1428c2ecf20Sopenharmony_ci.align 16 1438c2ecf20Sopenharmony_ciONE: .octa 0x00000000000000000000000000000001 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci.section .rodata.cst16.ONEf, "aM", @progbits, 16 1468c2ecf20Sopenharmony_ci.align 16 1478c2ecf20Sopenharmony_ciONEf: .octa 0x01000000000000000000000000000000 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci# order of these constants should not change. 1508c2ecf20Sopenharmony_ci# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F 1518c2ecf20Sopenharmony_ci.section .rodata, "a", @progbits 1528c2ecf20Sopenharmony_ci.align 16 1538c2ecf20Sopenharmony_ciSHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 1548c2ecf20Sopenharmony_ciALL_F: .octa 0xffffffffffffffffffffffffffffffff 1558c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000000 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci.section .rodata 1588c2ecf20Sopenharmony_ci.align 16 1598c2ecf20Sopenharmony_ci.type aad_shift_arr, @object 1608c2ecf20Sopenharmony_ci.size aad_shift_arr, 272 1618c2ecf20Sopenharmony_ciaad_shift_arr: 1628c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffffffffffffffff 1638c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffffffffffffff0C 1648c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffffffffffff0D0C 1658c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffffffffff0E0D0C 1668c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffffffff0F0E0D0C 1678c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffffff0C0B0A0908 1688c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffffff0D0C0B0A0908 1698c2ecf20Sopenharmony_ci .octa 0xffffffffffffffffff0E0D0C0B0A0908 1708c2ecf20Sopenharmony_ci .octa 0xffffffffffffffff0F0E0D0C0B0A0908 1718c2ecf20Sopenharmony_ci .octa 0xffffffffffffff0C0B0A090807060504 1728c2ecf20Sopenharmony_ci .octa 0xffffffffffff0D0C0B0A090807060504 1738c2ecf20Sopenharmony_ci .octa 0xffffffffff0E0D0C0B0A090807060504 1748c2ecf20Sopenharmony_ci .octa 0xffffffff0F0E0D0C0B0A090807060504 1758c2ecf20Sopenharmony_ci .octa 0xffffff0C0B0A09080706050403020100 1768c2ecf20Sopenharmony_ci .octa 0xffff0D0C0B0A09080706050403020100 1778c2ecf20Sopenharmony_ci .octa 0xff0E0D0C0B0A09080706050403020100 1788c2ecf20Sopenharmony_ci .octa 0x0F0E0D0C0B0A09080706050403020100 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci.text 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci#define AadHash 16*0 1858c2ecf20Sopenharmony_ci#define AadLen 16*1 1868c2ecf20Sopenharmony_ci#define InLen (16*1)+8 1878c2ecf20Sopenharmony_ci#define PBlockEncKey 16*2 1888c2ecf20Sopenharmony_ci#define OrigIV 16*3 1898c2ecf20Sopenharmony_ci#define CurCount 16*4 1908c2ecf20Sopenharmony_ci#define PBlockLen 16*5 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ciHashKey = 16*6 # store HashKey <<1 mod poly here 1938c2ecf20Sopenharmony_ciHashKey_2 = 16*7 # store HashKey^2 <<1 mod poly here 1948c2ecf20Sopenharmony_ciHashKey_3 = 16*8 # store HashKey^3 <<1 mod poly here 1958c2ecf20Sopenharmony_ciHashKey_4 = 16*9 # store HashKey^4 <<1 mod poly here 1968c2ecf20Sopenharmony_ciHashKey_5 = 16*10 # store HashKey^5 <<1 mod poly here 1978c2ecf20Sopenharmony_ciHashKey_6 = 16*11 # store HashKey^6 <<1 mod poly here 1988c2ecf20Sopenharmony_ciHashKey_7 = 16*12 # store HashKey^7 <<1 mod poly here 1998c2ecf20Sopenharmony_ciHashKey_8 = 16*13 # store HashKey^8 <<1 mod poly here 2008c2ecf20Sopenharmony_ciHashKey_k = 16*14 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) 2018c2ecf20Sopenharmony_ciHashKey_2_k = 16*15 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) 2028c2ecf20Sopenharmony_ciHashKey_3_k = 16*16 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) 2038c2ecf20Sopenharmony_ciHashKey_4_k = 16*17 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) 2048c2ecf20Sopenharmony_ciHashKey_5_k = 16*18 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) 2058c2ecf20Sopenharmony_ciHashKey_6_k = 16*19 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) 2068c2ecf20Sopenharmony_ciHashKey_7_k = 16*20 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) 2078c2ecf20Sopenharmony_ciHashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci#define arg1 %rdi 2108c2ecf20Sopenharmony_ci#define arg2 %rsi 2118c2ecf20Sopenharmony_ci#define arg3 %rdx 2128c2ecf20Sopenharmony_ci#define arg4 %rcx 2138c2ecf20Sopenharmony_ci#define arg5 %r8 2148c2ecf20Sopenharmony_ci#define arg6 %r9 2158c2ecf20Sopenharmony_ci#define arg7 STACK_OFFSET+8*1(%r14) 2168c2ecf20Sopenharmony_ci#define arg8 STACK_OFFSET+8*2(%r14) 2178c2ecf20Sopenharmony_ci#define arg9 STACK_OFFSET+8*3(%r14) 2188c2ecf20Sopenharmony_ci#define arg10 STACK_OFFSET+8*4(%r14) 2198c2ecf20Sopenharmony_ci#define keysize 2*15*16(arg1) 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_cii = 0 2228c2ecf20Sopenharmony_cij = 0 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ciout_order = 0 2258c2ecf20Sopenharmony_ciin_order = 1 2268c2ecf20Sopenharmony_ciDEC = 0 2278c2ecf20Sopenharmony_ciENC = 1 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci.macro define_reg r n 2308c2ecf20Sopenharmony_cireg_\r = %xmm\n 2318c2ecf20Sopenharmony_ci.endm 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci.macro setreg 2348c2ecf20Sopenharmony_ci.altmacro 2358c2ecf20Sopenharmony_cidefine_reg i %i 2368c2ecf20Sopenharmony_cidefine_reg j %j 2378c2ecf20Sopenharmony_ci.noaltmacro 2388c2ecf20Sopenharmony_ci.endm 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci# need to push 4 registers into stack to maintain 2418c2ecf20Sopenharmony_ciSTACK_OFFSET = 8*4 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ciTMP1 = 16*0 # Temporary storage for AAD 2448c2ecf20Sopenharmony_ciTMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) 2458c2ecf20Sopenharmony_ciTMP3 = 16*2 # Temporary storage for AES State 3 2468c2ecf20Sopenharmony_ciTMP4 = 16*3 # Temporary storage for AES State 4 2478c2ecf20Sopenharmony_ciTMP5 = 16*4 # Temporary storage for AES State 5 2488c2ecf20Sopenharmony_ciTMP6 = 16*5 # Temporary storage for AES State 6 2498c2ecf20Sopenharmony_ciTMP7 = 16*6 # Temporary storage for AES State 7 2508c2ecf20Sopenharmony_ciTMP8 = 16*7 # Temporary storage for AES State 8 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ciVARIABLE_OFFSET = 16*8 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci################################ 2558c2ecf20Sopenharmony_ci# Utility Macros 2568c2ecf20Sopenharmony_ci################################ 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci.macro FUNC_SAVE 2598c2ecf20Sopenharmony_ci #the number of pushes must equal STACK_OFFSET 2608c2ecf20Sopenharmony_ci push %r12 2618c2ecf20Sopenharmony_ci push %r13 2628c2ecf20Sopenharmony_ci push %r14 2638c2ecf20Sopenharmony_ci push %r15 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci mov %rsp, %r14 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci sub $VARIABLE_OFFSET, %rsp 2708c2ecf20Sopenharmony_ci and $~63, %rsp # align rsp to 64 bytes 2718c2ecf20Sopenharmony_ci.endm 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci.macro FUNC_RESTORE 2748c2ecf20Sopenharmony_ci mov %r14, %rsp 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci pop %r15 2778c2ecf20Sopenharmony_ci pop %r14 2788c2ecf20Sopenharmony_ci pop %r13 2798c2ecf20Sopenharmony_ci pop %r12 2808c2ecf20Sopenharmony_ci.endm 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci# Encryption of a single block 2838c2ecf20Sopenharmony_ci.macro ENCRYPT_SINGLE_BLOCK REP XMM0 2848c2ecf20Sopenharmony_ci vpxor (arg1), \XMM0, \XMM0 2858c2ecf20Sopenharmony_ci i = 1 2868c2ecf20Sopenharmony_ci setreg 2878c2ecf20Sopenharmony_ci.rep \REP 2888c2ecf20Sopenharmony_ci vaesenc 16*i(arg1), \XMM0, \XMM0 2898c2ecf20Sopenharmony_ci i = (i+1) 2908c2ecf20Sopenharmony_ci setreg 2918c2ecf20Sopenharmony_ci.endr 2928c2ecf20Sopenharmony_ci vaesenclast 16*i(arg1), \XMM0, \XMM0 2938c2ecf20Sopenharmony_ci.endm 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci# combined for GCM encrypt and decrypt functions 2968c2ecf20Sopenharmony_ci# clobbering all xmm registers 2978c2ecf20Sopenharmony_ci# clobbering r10, r11, r12, r13, r14, r15 2988c2ecf20Sopenharmony_ci.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP 2998c2ecf20Sopenharmony_ci vmovdqu AadHash(arg2), %xmm8 3008c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey 3018c2ecf20Sopenharmony_ci add arg5, InLen(arg2) 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci # initialize the data pointer offset as zero 3048c2ecf20Sopenharmony_ci xor %r11d, %r11d 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC 3078c2ecf20Sopenharmony_ci sub %r11, arg5 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci mov arg5, %r13 # save the number of bytes of plaintext/ciphertext 3108c2ecf20Sopenharmony_ci and $-16, %r13 # r13 = r13 - (r13 mod 16) 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci mov %r13, %r12 3138c2ecf20Sopenharmony_ci shr $4, %r12 3148c2ecf20Sopenharmony_ci and $7, %r12 3158c2ecf20Sopenharmony_ci jz _initial_num_blocks_is_0\@ 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci cmp $7, %r12 3188c2ecf20Sopenharmony_ci je _initial_num_blocks_is_7\@ 3198c2ecf20Sopenharmony_ci cmp $6, %r12 3208c2ecf20Sopenharmony_ci je _initial_num_blocks_is_6\@ 3218c2ecf20Sopenharmony_ci cmp $5, %r12 3228c2ecf20Sopenharmony_ci je _initial_num_blocks_is_5\@ 3238c2ecf20Sopenharmony_ci cmp $4, %r12 3248c2ecf20Sopenharmony_ci je _initial_num_blocks_is_4\@ 3258c2ecf20Sopenharmony_ci cmp $3, %r12 3268c2ecf20Sopenharmony_ci je _initial_num_blocks_is_3\@ 3278c2ecf20Sopenharmony_ci cmp $2, %r12 3288c2ecf20Sopenharmony_ci je _initial_num_blocks_is_2\@ 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci jmp _initial_num_blocks_is_1\@ 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci_initial_num_blocks_is_7\@: 3338c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3348c2ecf20Sopenharmony_ci sub $16*7, %r13 3358c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci_initial_num_blocks_is_6\@: 3388c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3398c2ecf20Sopenharmony_ci sub $16*6, %r13 3408c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci_initial_num_blocks_is_5\@: 3438c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3448c2ecf20Sopenharmony_ci sub $16*5, %r13 3458c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci_initial_num_blocks_is_4\@: 3488c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3498c2ecf20Sopenharmony_ci sub $16*4, %r13 3508c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci_initial_num_blocks_is_3\@: 3538c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3548c2ecf20Sopenharmony_ci sub $16*3, %r13 3558c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci_initial_num_blocks_is_2\@: 3588c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3598c2ecf20Sopenharmony_ci sub $16*2, %r13 3608c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci_initial_num_blocks_is_1\@: 3638c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3648c2ecf20Sopenharmony_ci sub $16*1, %r13 3658c2ecf20Sopenharmony_ci jmp _initial_blocks_encrypted\@ 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci_initial_num_blocks_is_0\@: 3688c2ecf20Sopenharmony_ci \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci_initial_blocks_encrypted\@: 3728c2ecf20Sopenharmony_ci test %r13, %r13 3738c2ecf20Sopenharmony_ci je _zero_cipher_left\@ 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci sub $128, %r13 3768c2ecf20Sopenharmony_ci je _eight_cipher_left\@ 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci vmovd %xmm9, %r15d 3828c2ecf20Sopenharmony_ci and $255, %r15d 3838c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci_encrypt_by_8_new\@: 3878c2ecf20Sopenharmony_ci cmp $(255-8), %r15d 3888c2ecf20Sopenharmony_ci jg _encrypt_by_8\@ 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci add $8, %r15b 3938c2ecf20Sopenharmony_ci \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC 3948c2ecf20Sopenharmony_ci add $128, %r11 3958c2ecf20Sopenharmony_ci sub $128, %r13 3968c2ecf20Sopenharmony_ci jne _encrypt_by_8_new\@ 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 3998c2ecf20Sopenharmony_ci jmp _eight_cipher_left\@ 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci_encrypt_by_8\@: 4028c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 4038c2ecf20Sopenharmony_ci add $8, %r15b 4048c2ecf20Sopenharmony_ci \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC 4058c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 4068c2ecf20Sopenharmony_ci add $128, %r11 4078c2ecf20Sopenharmony_ci sub $128, %r13 4088c2ecf20Sopenharmony_ci jne _encrypt_by_8_new\@ 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci_eight_cipher_left\@: 4168c2ecf20Sopenharmony_ci \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci_zero_cipher_left\@: 4208c2ecf20Sopenharmony_ci vmovdqu %xmm14, AadHash(arg2) 4218c2ecf20Sopenharmony_ci vmovdqu %xmm9, CurCount(arg2) 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci # check for 0 length 4248c2ecf20Sopenharmony_ci mov arg5, %r13 4258c2ecf20Sopenharmony_ci and $15, %r13 # r13 = (arg5 mod 16) 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci je _multiple_of_16_bytes\@ 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci # handle the last <16 Byte block separately 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci mov %r13, PBlockLen(arg2) 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn 4348c2ecf20Sopenharmony_ci vmovdqu %xmm9, CurCount(arg2) 4358c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn) 4388c2ecf20Sopenharmony_ci vmovdqu %xmm9, PBlockEncKey(arg2) 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci cmp $16, arg5 4418c2ecf20Sopenharmony_ci jge _large_enough_update\@ 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci lea (arg4,%r11,1), %r10 4448c2ecf20Sopenharmony_ci mov %r13, %r12 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm1 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci lea SHIFT_MASK+16(%rip), %r12 4498c2ecf20Sopenharmony_ci sub %r13, %r12 # adjust the shuffle mask pointer to be 4508c2ecf20Sopenharmony_ci # able to shift 16-r13 bytes (r13 is the 4518c2ecf20Sopenharmony_ci # number of bytes in plaintext mod 16) 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci jmp _final_ghash_mul\@ 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci_large_enough_update\@: 4568c2ecf20Sopenharmony_ci sub $16, %r11 4578c2ecf20Sopenharmony_ci add %r13, %r11 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci # receive the last <16 Byte block 4608c2ecf20Sopenharmony_ci vmovdqu (arg4, %r11, 1), %xmm1 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci sub %r13, %r11 4638c2ecf20Sopenharmony_ci add $16, %r11 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci lea SHIFT_MASK+16(%rip), %r12 4668c2ecf20Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift 16-r13 bytes 4678c2ecf20Sopenharmony_ci # (r13 is the number of bytes in plaintext mod 16) 4688c2ecf20Sopenharmony_ci sub %r13, %r12 4698c2ecf20Sopenharmony_ci # get the appropriate shuffle mask 4708c2ecf20Sopenharmony_ci vmovdqu (%r12), %xmm2 4718c2ecf20Sopenharmony_ci # shift right 16-r13 bytes 4728c2ecf20Sopenharmony_ci vpshufb %xmm2, %xmm1, %xmm1 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci_final_ghash_mul\@: 4758c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 4768c2ecf20Sopenharmony_ci vmovdqa %xmm1, %xmm2 4778c2ecf20Sopenharmony_ci vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) 4788c2ecf20Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to 4798c2ecf20Sopenharmony_ci # mask out top 16-r13 bytes of xmm9 4808c2ecf20Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 4818c2ecf20Sopenharmony_ci vpand %xmm1, %xmm2, %xmm2 4828c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 4838c2ecf20Sopenharmony_ci vpxor %xmm2, %xmm14, %xmm14 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci vmovdqu %xmm14, AadHash(arg2) 4868c2ecf20Sopenharmony_ci .else 4878c2ecf20Sopenharmony_ci vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) 4888c2ecf20Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to 4898c2ecf20Sopenharmony_ci # mask out top 16-r13 bytes of xmm9 4908c2ecf20Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 4918c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 4928c2ecf20Sopenharmony_ci vpxor %xmm9, %xmm14, %xmm14 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci vmovdqu %xmm14, AadHash(arg2) 4958c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext 4968c2ecf20Sopenharmony_ci .endif 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci ############################# 5008c2ecf20Sopenharmony_ci # output r13 Bytes 5018c2ecf20Sopenharmony_ci vmovq %xmm9, %rax 5028c2ecf20Sopenharmony_ci cmp $8, %r13 5038c2ecf20Sopenharmony_ci jle _less_than_8_bytes_left\@ 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci mov %rax, (arg3 , %r11) 5068c2ecf20Sopenharmony_ci add $8, %r11 5078c2ecf20Sopenharmony_ci vpsrldq $8, %xmm9, %xmm9 5088c2ecf20Sopenharmony_ci vmovq %xmm9, %rax 5098c2ecf20Sopenharmony_ci sub $8, %r13 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci_less_than_8_bytes_left\@: 5128c2ecf20Sopenharmony_ci movb %al, (arg3 , %r11) 5138c2ecf20Sopenharmony_ci add $1, %r11 5148c2ecf20Sopenharmony_ci shr $8, %rax 5158c2ecf20Sopenharmony_ci sub $1, %r13 5168c2ecf20Sopenharmony_ci jne _less_than_8_bytes_left\@ 5178c2ecf20Sopenharmony_ci ############################# 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci_multiple_of_16_bytes\@: 5208c2ecf20Sopenharmony_ci.endm 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci# GCM_COMPLETE Finishes update of tag of last partial block 5248c2ecf20Sopenharmony_ci# Output: Authorization Tag (AUTH_TAG) 5258c2ecf20Sopenharmony_ci# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 5268c2ecf20Sopenharmony_ci.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN 5278c2ecf20Sopenharmony_ci vmovdqu AadHash(arg2), %xmm14 5288c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), %xmm13 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci mov PBlockLen(arg2), %r12 5318c2ecf20Sopenharmony_ci test %r12, %r12 5328c2ecf20Sopenharmony_ci je _partial_done\@ 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci #GHASH computation for the last <16 Byte block 5358c2ecf20Sopenharmony_ci \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci_partial_done\@: 5388c2ecf20Sopenharmony_ci mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) 5398c2ecf20Sopenharmony_ci shl $3, %r12 # convert into number of bits 5408c2ecf20Sopenharmony_ci vmovd %r12d, %xmm15 # len(A) in xmm15 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci mov InLen(arg2), %r12 5438c2ecf20Sopenharmony_ci shl $3, %r12 # len(C) in bits (*128) 5448c2ecf20Sopenharmony_ci vmovq %r12, %xmm1 5458c2ecf20Sopenharmony_ci vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 5468c2ecf20Sopenharmony_ci vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci vpxor %xmm15, %xmm14, %xmm14 5498c2ecf20Sopenharmony_ci \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation 5508c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci vmovdqu OrigIV(arg2), %xmm9 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0) 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci vpxor %xmm14, %xmm9, %xmm9 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci_return_T\@: 5618c2ecf20Sopenharmony_ci mov \AUTH_TAG, %r10 # r10 = authTag 5628c2ecf20Sopenharmony_ci mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci cmp $16, %r11 5658c2ecf20Sopenharmony_ci je _T_16\@ 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci cmp $8, %r11 5688c2ecf20Sopenharmony_ci jl _T_4\@ 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci_T_8\@: 5718c2ecf20Sopenharmony_ci vmovq %xmm9, %rax 5728c2ecf20Sopenharmony_ci mov %rax, (%r10) 5738c2ecf20Sopenharmony_ci add $8, %r10 5748c2ecf20Sopenharmony_ci sub $8, %r11 5758c2ecf20Sopenharmony_ci vpsrldq $8, %xmm9, %xmm9 5768c2ecf20Sopenharmony_ci test %r11, %r11 5778c2ecf20Sopenharmony_ci je _return_T_done\@ 5788c2ecf20Sopenharmony_ci_T_4\@: 5798c2ecf20Sopenharmony_ci vmovd %xmm9, %eax 5808c2ecf20Sopenharmony_ci mov %eax, (%r10) 5818c2ecf20Sopenharmony_ci add $4, %r10 5828c2ecf20Sopenharmony_ci sub $4, %r11 5838c2ecf20Sopenharmony_ci vpsrldq $4, %xmm9, %xmm9 5848c2ecf20Sopenharmony_ci test %r11, %r11 5858c2ecf20Sopenharmony_ci je _return_T_done\@ 5868c2ecf20Sopenharmony_ci_T_123\@: 5878c2ecf20Sopenharmony_ci vmovd %xmm9, %eax 5888c2ecf20Sopenharmony_ci cmp $2, %r11 5898c2ecf20Sopenharmony_ci jl _T_1\@ 5908c2ecf20Sopenharmony_ci mov %ax, (%r10) 5918c2ecf20Sopenharmony_ci cmp $2, %r11 5928c2ecf20Sopenharmony_ci je _return_T_done\@ 5938c2ecf20Sopenharmony_ci add $2, %r10 5948c2ecf20Sopenharmony_ci sar $16, %eax 5958c2ecf20Sopenharmony_ci_T_1\@: 5968c2ecf20Sopenharmony_ci mov %al, (%r10) 5978c2ecf20Sopenharmony_ci jmp _return_T_done\@ 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci_T_16\@: 6008c2ecf20Sopenharmony_ci vmovdqu %xmm9, (%r10) 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci_return_T_done\@: 6038c2ecf20Sopenharmony_ci.endm 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci mov \AAD, %r10 # r10 = AAD 6088c2ecf20Sopenharmony_ci mov \AADLEN, %r12 # r12 = aadLen 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci mov %r12, %r11 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci vpxor \T8, \T8, \T8 6148c2ecf20Sopenharmony_ci vpxor \T7, \T7, \T7 6158c2ecf20Sopenharmony_ci cmp $16, %r11 6168c2ecf20Sopenharmony_ci jl _get_AAD_rest8\@ 6178c2ecf20Sopenharmony_ci_get_AAD_blocks\@: 6188c2ecf20Sopenharmony_ci vmovdqu (%r10), \T7 6198c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \T7, \T7 6208c2ecf20Sopenharmony_ci vpxor \T7, \T8, \T8 6218c2ecf20Sopenharmony_ci \GHASH_MUL \T8, \T2, \T1, \T3, \T4, \T5, \T6 6228c2ecf20Sopenharmony_ci add $16, %r10 6238c2ecf20Sopenharmony_ci sub $16, %r12 6248c2ecf20Sopenharmony_ci sub $16, %r11 6258c2ecf20Sopenharmony_ci cmp $16, %r11 6268c2ecf20Sopenharmony_ci jge _get_AAD_blocks\@ 6278c2ecf20Sopenharmony_ci vmovdqu \T8, \T7 6288c2ecf20Sopenharmony_ci test %r11, %r11 6298c2ecf20Sopenharmony_ci je _get_AAD_done\@ 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci vpxor \T7, \T7, \T7 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci /* read the last <16B of AAD. since we have at least 4B of 6348c2ecf20Sopenharmony_ci data right after the AAD (the ICV, and maybe some CT), we can 6358c2ecf20Sopenharmony_ci read 4B/8B blocks safely, and then get rid of the extra stuff */ 6368c2ecf20Sopenharmony_ci_get_AAD_rest8\@: 6378c2ecf20Sopenharmony_ci cmp $4, %r11 6388c2ecf20Sopenharmony_ci jle _get_AAD_rest4\@ 6398c2ecf20Sopenharmony_ci movq (%r10), \T1 6408c2ecf20Sopenharmony_ci add $8, %r10 6418c2ecf20Sopenharmony_ci sub $8, %r11 6428c2ecf20Sopenharmony_ci vpslldq $8, \T1, \T1 6438c2ecf20Sopenharmony_ci vpsrldq $8, \T7, \T7 6448c2ecf20Sopenharmony_ci vpxor \T1, \T7, \T7 6458c2ecf20Sopenharmony_ci jmp _get_AAD_rest8\@ 6468c2ecf20Sopenharmony_ci_get_AAD_rest4\@: 6478c2ecf20Sopenharmony_ci test %r11, %r11 6488c2ecf20Sopenharmony_ci jle _get_AAD_rest0\@ 6498c2ecf20Sopenharmony_ci mov (%r10), %eax 6508c2ecf20Sopenharmony_ci movq %rax, \T1 6518c2ecf20Sopenharmony_ci add $4, %r10 6528c2ecf20Sopenharmony_ci sub $4, %r11 6538c2ecf20Sopenharmony_ci vpslldq $12, \T1, \T1 6548c2ecf20Sopenharmony_ci vpsrldq $4, \T7, \T7 6558c2ecf20Sopenharmony_ci vpxor \T1, \T7, \T7 6568c2ecf20Sopenharmony_ci_get_AAD_rest0\@: 6578c2ecf20Sopenharmony_ci /* finalize: shift out the extra bytes we read, and align 6588c2ecf20Sopenharmony_ci left. since pslldq can only shift by an immediate, we use 6598c2ecf20Sopenharmony_ci vpshufb and an array of shuffle masks */ 6608c2ecf20Sopenharmony_ci movq %r12, %r11 6618c2ecf20Sopenharmony_ci salq $4, %r11 6628c2ecf20Sopenharmony_ci vmovdqu aad_shift_arr(%r11), \T1 6638c2ecf20Sopenharmony_ci vpshufb \T1, \T7, \T7 6648c2ecf20Sopenharmony_ci_get_AAD_rest_final\@: 6658c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \T7, \T7 6668c2ecf20Sopenharmony_ci vpxor \T8, \T7, \T7 6678c2ecf20Sopenharmony_ci \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci_get_AAD_done\@: 6708c2ecf20Sopenharmony_ci vmovdqu \T7, AadHash(arg2) 6718c2ecf20Sopenharmony_ci.endm 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci.macro INIT GHASH_MUL PRECOMPUTE 6748c2ecf20Sopenharmony_ci mov arg6, %r11 6758c2ecf20Sopenharmony_ci mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length 6768c2ecf20Sopenharmony_ci xor %r11d, %r11d 6778c2ecf20Sopenharmony_ci mov %r11, InLen(arg2) # ctx_data.in_length = 0 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0 6808c2ecf20Sopenharmony_ci mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0 6818c2ecf20Sopenharmony_ci mov arg3, %rax 6828c2ecf20Sopenharmony_ci movdqu (%rax), %xmm0 6838c2ecf20Sopenharmony_ci movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm0, %xmm0 6868c2ecf20Sopenharmony_ci movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci vmovdqu (arg4), %xmm6 # xmm6 = HashKey 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 6918c2ecf20Sopenharmony_ci ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey 6928c2ecf20Sopenharmony_ci vmovdqa %xmm6, %xmm2 6938c2ecf20Sopenharmony_ci vpsllq $1, %xmm6, %xmm6 6948c2ecf20Sopenharmony_ci vpsrlq $63, %xmm2, %xmm2 6958c2ecf20Sopenharmony_ci vmovdqa %xmm2, %xmm1 6968c2ecf20Sopenharmony_ci vpslldq $8, %xmm2, %xmm2 6978c2ecf20Sopenharmony_ci vpsrldq $8, %xmm1, %xmm1 6988c2ecf20Sopenharmony_ci vpor %xmm2, %xmm6, %xmm6 6998c2ecf20Sopenharmony_ci #reduction 7008c2ecf20Sopenharmony_ci vpshufd $0b00100100, %xmm1, %xmm2 7018c2ecf20Sopenharmony_ci vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 7028c2ecf20Sopenharmony_ci vpand POLY(%rip), %xmm2, %xmm2 7038c2ecf20Sopenharmony_ci vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly 7048c2ecf20Sopenharmony_ci ####################################################################### 7058c2ecf20Sopenharmony_ci vmovdqu %xmm6, HashKey(arg2) # store HashKey<<1 mod poly 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci \PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 7108c2ecf20Sopenharmony_ci.endm 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci# Reads DLEN bytes starting at DPTR and stores in XMMDst 7148c2ecf20Sopenharmony_ci# where 0 < DLEN < 16 7158c2ecf20Sopenharmony_ci# Clobbers %rax, DLEN 7168c2ecf20Sopenharmony_ci.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst 7178c2ecf20Sopenharmony_ci vpxor \XMMDst, \XMMDst, \XMMDst 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci cmp $8, \DLEN 7208c2ecf20Sopenharmony_ci jl _read_lt8_\@ 7218c2ecf20Sopenharmony_ci mov (\DPTR), %rax 7228c2ecf20Sopenharmony_ci vpinsrq $0, %rax, \XMMDst, \XMMDst 7238c2ecf20Sopenharmony_ci sub $8, \DLEN 7248c2ecf20Sopenharmony_ci jz _done_read_partial_block_\@ 7258c2ecf20Sopenharmony_ci xor %eax, %eax 7268c2ecf20Sopenharmony_ci_read_next_byte_\@: 7278c2ecf20Sopenharmony_ci shl $8, %rax 7288c2ecf20Sopenharmony_ci mov 7(\DPTR, \DLEN, 1), %al 7298c2ecf20Sopenharmony_ci dec \DLEN 7308c2ecf20Sopenharmony_ci jnz _read_next_byte_\@ 7318c2ecf20Sopenharmony_ci vpinsrq $1, %rax, \XMMDst, \XMMDst 7328c2ecf20Sopenharmony_ci jmp _done_read_partial_block_\@ 7338c2ecf20Sopenharmony_ci_read_lt8_\@: 7348c2ecf20Sopenharmony_ci xor %eax, %eax 7358c2ecf20Sopenharmony_ci_read_next_byte_lt8_\@: 7368c2ecf20Sopenharmony_ci shl $8, %rax 7378c2ecf20Sopenharmony_ci mov -1(\DPTR, \DLEN, 1), %al 7388c2ecf20Sopenharmony_ci dec \DLEN 7398c2ecf20Sopenharmony_ci jnz _read_next_byte_lt8_\@ 7408c2ecf20Sopenharmony_ci vpinsrq $0, %rax, \XMMDst, \XMMDst 7418c2ecf20Sopenharmony_ci_done_read_partial_block_\@: 7428c2ecf20Sopenharmony_ci.endm 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks 7458c2ecf20Sopenharmony_ci# between update calls. 7468c2ecf20Sopenharmony_ci# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK 7478c2ecf20Sopenharmony_ci# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context 7488c2ecf20Sopenharmony_ci# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 7498c2ecf20Sopenharmony_ci.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ 7508c2ecf20Sopenharmony_ci AAD_HASH ENC_DEC 7518c2ecf20Sopenharmony_ci mov PBlockLen(arg2), %r13 7528c2ecf20Sopenharmony_ci test %r13, %r13 7538c2ecf20Sopenharmony_ci je _partial_block_done_\@ # Leave Macro if no partial blocks 7548c2ecf20Sopenharmony_ci # Read in input data without over reading 7558c2ecf20Sopenharmony_ci cmp $16, \PLAIN_CYPH_LEN 7568c2ecf20Sopenharmony_ci jl _fewer_than_16_bytes_\@ 7578c2ecf20Sopenharmony_ci vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm 7588c2ecf20Sopenharmony_ci jmp _data_read_\@ 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci_fewer_than_16_bytes_\@: 7618c2ecf20Sopenharmony_ci lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 7628c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r12 7638c2ecf20Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm1 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci mov PBlockLen(arg2), %r13 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci_data_read_\@: # Finished reading in data 7688c2ecf20Sopenharmony_ci 7698c2ecf20Sopenharmony_ci vmovdqu PBlockEncKey(arg2), %xmm9 7708c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), %xmm13 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci lea SHIFT_MASK(%rip), %r12 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift r13 bytes 7758c2ecf20Sopenharmony_ci # r16-r13 is the number of bytes in plaintext mod 16) 7768c2ecf20Sopenharmony_ci add %r13, %r12 7778c2ecf20Sopenharmony_ci vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask 7788c2ecf20Sopenharmony_ci vpshufb %xmm2, %xmm9, %xmm9 # shift right r13 bytes 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci.if \ENC_DEC == DEC 7818c2ecf20Sopenharmony_ci vmovdqa %xmm1, %xmm3 7828c2ecf20Sopenharmony_ci pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 7858c2ecf20Sopenharmony_ci add %r13, %r10 7868c2ecf20Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 7878c2ecf20Sopenharmony_ci sub $16, %r10 7888c2ecf20Sopenharmony_ci # Determine if if partial block is not being filled and 7898c2ecf20Sopenharmony_ci # shift mask accordingly 7908c2ecf20Sopenharmony_ci jge _no_extra_mask_1_\@ 7918c2ecf20Sopenharmony_ci sub %r10, %r12 7928c2ecf20Sopenharmony_ci_no_extra_mask_1_\@: 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 7958c2ecf20Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 7968c2ecf20Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 # mask out bottom r13 bytes of xmm9 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci vpand %xmm1, %xmm3, %xmm3 7998c2ecf20Sopenharmony_ci vmovdqa SHUF_MASK(%rip), %xmm10 8008c2ecf20Sopenharmony_ci vpshufb %xmm10, %xmm3, %xmm3 8018c2ecf20Sopenharmony_ci vpshufb %xmm2, %xmm3, %xmm3 8028c2ecf20Sopenharmony_ci vpxor %xmm3, \AAD_HASH, \AAD_HASH 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci test %r10, %r10 8058c2ecf20Sopenharmony_ci jl _partial_incomplete_1_\@ 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci # GHASH computation for the last <16 Byte block 8088c2ecf20Sopenharmony_ci \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 8098c2ecf20Sopenharmony_ci xor %eax,%eax 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci mov %rax, PBlockLen(arg2) 8128c2ecf20Sopenharmony_ci jmp _dec_done_\@ 8138c2ecf20Sopenharmony_ci_partial_incomplete_1_\@: 8148c2ecf20Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(arg2) 8158c2ecf20Sopenharmony_ci_dec_done_\@: 8168c2ecf20Sopenharmony_ci vmovdqu \AAD_HASH, AadHash(arg2) 8178c2ecf20Sopenharmony_ci.else 8188c2ecf20Sopenharmony_ci vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 8218c2ecf20Sopenharmony_ci add %r13, %r10 8228c2ecf20Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 8238c2ecf20Sopenharmony_ci sub $16, %r10 8248c2ecf20Sopenharmony_ci # Determine if if partial block is not being filled and 8258c2ecf20Sopenharmony_ci # shift mask accordingly 8268c2ecf20Sopenharmony_ci jge _no_extra_mask_2_\@ 8278c2ecf20Sopenharmony_ci sub %r10, %r12 8288c2ecf20Sopenharmony_ci_no_extra_mask_2_\@: 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 8318c2ecf20Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 8328c2ecf20Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_ci vmovdqa SHUF_MASK(%rip), %xmm1 8358c2ecf20Sopenharmony_ci vpshufb %xmm1, %xmm9, %xmm9 8368c2ecf20Sopenharmony_ci vpshufb %xmm2, %xmm9, %xmm9 8378c2ecf20Sopenharmony_ci vpxor %xmm9, \AAD_HASH, \AAD_HASH 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci test %r10, %r10 8408c2ecf20Sopenharmony_ci jl _partial_incomplete_2_\@ 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci # GHASH computation for the last <16 Byte block 8438c2ecf20Sopenharmony_ci \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 8448c2ecf20Sopenharmony_ci xor %eax,%eax 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci mov %rax, PBlockLen(arg2) 8478c2ecf20Sopenharmony_ci jmp _encode_done_\@ 8488c2ecf20Sopenharmony_ci_partial_incomplete_2_\@: 8498c2ecf20Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(arg2) 8508c2ecf20Sopenharmony_ci_encode_done_\@: 8518c2ecf20Sopenharmony_ci vmovdqu \AAD_HASH, AadHash(arg2) 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci vmovdqa SHUF_MASK(%rip), %xmm10 8548c2ecf20Sopenharmony_ci # shuffle xmm9 back to output as ciphertext 8558c2ecf20Sopenharmony_ci vpshufb %xmm10, %xmm9, %xmm9 8568c2ecf20Sopenharmony_ci vpshufb %xmm2, %xmm9, %xmm9 8578c2ecf20Sopenharmony_ci.endif 8588c2ecf20Sopenharmony_ci # output encrypted Bytes 8598c2ecf20Sopenharmony_ci test %r10, %r10 8608c2ecf20Sopenharmony_ci jl _partial_fill_\@ 8618c2ecf20Sopenharmony_ci mov %r13, %r12 8628c2ecf20Sopenharmony_ci mov $16, %r13 8638c2ecf20Sopenharmony_ci # Set r13 to be the number of bytes to write out 8648c2ecf20Sopenharmony_ci sub %r12, %r13 8658c2ecf20Sopenharmony_ci jmp _count_set_\@ 8668c2ecf20Sopenharmony_ci_partial_fill_\@: 8678c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r13 8688c2ecf20Sopenharmony_ci_count_set_\@: 8698c2ecf20Sopenharmony_ci vmovdqa %xmm9, %xmm0 8708c2ecf20Sopenharmony_ci vmovq %xmm0, %rax 8718c2ecf20Sopenharmony_ci cmp $8, %r13 8728c2ecf20Sopenharmony_ci jle _less_than_8_bytes_left_\@ 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 8758c2ecf20Sopenharmony_ci add $8, \DATA_OFFSET 8768c2ecf20Sopenharmony_ci psrldq $8, %xmm0 8778c2ecf20Sopenharmony_ci vmovq %xmm0, %rax 8788c2ecf20Sopenharmony_ci sub $8, %r13 8798c2ecf20Sopenharmony_ci_less_than_8_bytes_left_\@: 8808c2ecf20Sopenharmony_ci movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 8818c2ecf20Sopenharmony_ci add $1, \DATA_OFFSET 8828c2ecf20Sopenharmony_ci shr $8, %rax 8838c2ecf20Sopenharmony_ci sub $1, %r13 8848c2ecf20Sopenharmony_ci jne _less_than_8_bytes_left_\@ 8858c2ecf20Sopenharmony_ci_partial_block_done_\@: 8868c2ecf20Sopenharmony_ci.endm # PARTIAL_BLOCK 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci############################################################################### 8898c2ecf20Sopenharmony_ci# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 8908c2ecf20Sopenharmony_ci# Input: A and B (128-bits each, bit-reflected) 8918c2ecf20Sopenharmony_ci# Output: C = A*B*x mod poly, (i.e. >>1 ) 8928c2ecf20Sopenharmony_ci# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input 8938c2ecf20Sopenharmony_ci# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. 8948c2ecf20Sopenharmony_ci############################################################################### 8958c2ecf20Sopenharmony_ci.macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci vpshufd $0b01001110, \GH, \T2 8988c2ecf20Sopenharmony_ci vpshufd $0b01001110, \HK, \T3 8998c2ecf20Sopenharmony_ci vpxor \GH , \T2, \T2 # T2 = (a1+a0) 9008c2ecf20Sopenharmony_ci vpxor \HK , \T3, \T3 # T3 = (b1+b0) 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 9038c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 9048c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) 9058c2ecf20Sopenharmony_ci vpxor \GH, \T2,\T2 9068c2ecf20Sopenharmony_ci vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci vpslldq $8, \T2,\T3 # shift-L T3 2 DWs 9098c2ecf20Sopenharmony_ci vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs 9108c2ecf20Sopenharmony_ci vpxor \T3, \GH, \GH 9118c2ecf20Sopenharmony_ci vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci #first phase of the reduction 9148c2ecf20Sopenharmony_ci vpslld $31, \GH, \T2 # packed right shifting << 31 9158c2ecf20Sopenharmony_ci vpslld $30, \GH, \T3 # packed right shifting shift << 30 9168c2ecf20Sopenharmony_ci vpslld $25, \GH, \T4 # packed right shifting shift << 25 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 9198c2ecf20Sopenharmony_ci vpxor \T4, \T2, \T2 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci vpsrldq $4, \T2, \T5 # shift-R T5 1 DW 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci vpslldq $12, \T2, \T2 # shift-L T2 3 DWs 9248c2ecf20Sopenharmony_ci vpxor \T2, \GH, \GH # first phase of the reduction complete 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci #second phase of the reduction 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci vpsrld $1,\GH, \T2 # packed left shifting >> 1 9298c2ecf20Sopenharmony_ci vpsrld $2,\GH, \T3 # packed left shifting >> 2 9308c2ecf20Sopenharmony_ci vpsrld $7,\GH, \T4 # packed left shifting >> 7 9318c2ecf20Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 9328c2ecf20Sopenharmony_ci vpxor \T4, \T2, \T2 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci vpxor \T5, \T2, \T2 9358c2ecf20Sopenharmony_ci vpxor \T2, \GH, \GH 9368c2ecf20Sopenharmony_ci vpxor \T1, \GH, \GH # the result is in GH 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci.endm 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 9428c2ecf20Sopenharmony_ci 9438c2ecf20Sopenharmony_ci # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 9448c2ecf20Sopenharmony_ci vmovdqa \HK, \T5 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9478c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9488c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_k(arg2) 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly 9518c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly 9528c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9538c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9548c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_2_k(arg2) 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly 9578c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_3(arg2) 9588c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9598c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9608c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_3_k(arg2) 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly 9638c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_4(arg2) 9648c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9658c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9668c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_4_k(arg2) 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly 9698c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_5(arg2) 9708c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9718c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9728c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_5_k(arg2) 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly 9758c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_6(arg2) 9768c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9778c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9788c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_6_k(arg2) 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly 9818c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_7(arg2) 9828c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9838c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9848c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_7_k(arg2) 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly 9878c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_8(arg2) 9888c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 9898c2ecf20Sopenharmony_ci vpxor \T5, \T1, \T1 9908c2ecf20Sopenharmony_ci vmovdqu \T1, HashKey_8_k(arg2) 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci.endm 9938c2ecf20Sopenharmony_ci 9948c2ecf20Sopenharmony_ci## if a = number of total plaintext bytes 9958c2ecf20Sopenharmony_ci## b = floor(a/16) 9968c2ecf20Sopenharmony_ci## num_initial_blocks = b mod 4# 9978c2ecf20Sopenharmony_ci## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext 9988c2ecf20Sopenharmony_ci## r10, r11, r12, rax are clobbered 9998c2ecf20Sopenharmony_ci## arg1, arg3, arg4, r14 are used as a pointer only, not modified 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC 10028c2ecf20Sopenharmony_ci i = (8-\num_initial_blocks) 10038c2ecf20Sopenharmony_ci setreg 10048c2ecf20Sopenharmony_ci vmovdqu AadHash(arg2), reg_i 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci # start AES for num_initial_blocks blocks 10078c2ecf20Sopenharmony_ci vmovdqu CurCount(arg2), \CTR 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 10108c2ecf20Sopenharmony_ci setreg 10118c2ecf20Sopenharmony_ci.rep \num_initial_blocks 10128c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 10138c2ecf20Sopenharmony_ci vmovdqa \CTR, reg_i 10148c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap 10158c2ecf20Sopenharmony_ci i = (i+1) 10168c2ecf20Sopenharmony_ci setreg 10178c2ecf20Sopenharmony_ci.endr 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_ci vmovdqa (arg1), \T_key 10208c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 10218c2ecf20Sopenharmony_ci setreg 10228c2ecf20Sopenharmony_ci.rep \num_initial_blocks 10238c2ecf20Sopenharmony_ci vpxor \T_key, reg_i, reg_i 10248c2ecf20Sopenharmony_ci i = (i+1) 10258c2ecf20Sopenharmony_ci setreg 10268c2ecf20Sopenharmony_ci.endr 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci j = 1 10298c2ecf20Sopenharmony_ci setreg 10308c2ecf20Sopenharmony_ci.rep \REP 10318c2ecf20Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 10328c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 10338c2ecf20Sopenharmony_ci setreg 10348c2ecf20Sopenharmony_ci.rep \num_initial_blocks 10358c2ecf20Sopenharmony_ci vaesenc \T_key, reg_i, reg_i 10368c2ecf20Sopenharmony_ci i = (i+1) 10378c2ecf20Sopenharmony_ci setreg 10388c2ecf20Sopenharmony_ci.endr 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci j = (j+1) 10418c2ecf20Sopenharmony_ci setreg 10428c2ecf20Sopenharmony_ci.endr 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 10458c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 10468c2ecf20Sopenharmony_ci setreg 10478c2ecf20Sopenharmony_ci.rep \num_initial_blocks 10488c2ecf20Sopenharmony_ci vaesenclast \T_key, reg_i, reg_i 10498c2ecf20Sopenharmony_ci i = (i+1) 10508c2ecf20Sopenharmony_ci setreg 10518c2ecf20Sopenharmony_ci.endr 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 10548c2ecf20Sopenharmony_ci setreg 10558c2ecf20Sopenharmony_ci.rep \num_initial_blocks 10568c2ecf20Sopenharmony_ci vmovdqu (arg4, %r11), \T1 10578c2ecf20Sopenharmony_ci vpxor \T1, reg_i, reg_i 10588c2ecf20Sopenharmony_ci vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for num_initial_blocks blocks 10598c2ecf20Sopenharmony_ci add $16, %r11 10608c2ecf20Sopenharmony_ci.if \ENC_DEC == DEC 10618c2ecf20Sopenharmony_ci vmovdqa \T1, reg_i 10628c2ecf20Sopenharmony_ci.endif 10638c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations 10648c2ecf20Sopenharmony_ci i = (i+1) 10658c2ecf20Sopenharmony_ci setreg 10668c2ecf20Sopenharmony_ci.endr 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci i = (8-\num_initial_blocks) 10708c2ecf20Sopenharmony_ci j = (9-\num_initial_blocks) 10718c2ecf20Sopenharmony_ci setreg 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci.rep \num_initial_blocks 10748c2ecf20Sopenharmony_ci vpxor reg_i, reg_j, reg_j 10758c2ecf20Sopenharmony_ci GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks 10768c2ecf20Sopenharmony_ci i = (i+1) 10778c2ecf20Sopenharmony_ci j = (j+1) 10788c2ecf20Sopenharmony_ci setreg 10798c2ecf20Sopenharmony_ci.endr 10808c2ecf20Sopenharmony_ci # XMM8 has the combined result here 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci vmovdqa \XMM8, TMP1(%rsp) 10838c2ecf20Sopenharmony_ci vmovdqa \XMM8, \T3 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci cmp $128, %r13 10868c2ecf20Sopenharmony_ci jl _initial_blocks_done\@ # no need for precomputed constants 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_ci############################################################################### 10898c2ecf20Sopenharmony_ci# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 10908c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 10918c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM1 10928c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 10958c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM2 10968c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 10998c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM3 11008c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 11038c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM4 11048c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 11058c2ecf20Sopenharmony_ci 11068c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 11078c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM5 11088c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 11118c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM6 11128c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 11158c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM7 11168c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 11198c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM8 11208c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci vmovdqa (arg1), \T_key 11238c2ecf20Sopenharmony_ci vpxor \T_key, \XMM1, \XMM1 11248c2ecf20Sopenharmony_ci vpxor \T_key, \XMM2, \XMM2 11258c2ecf20Sopenharmony_ci vpxor \T_key, \XMM3, \XMM3 11268c2ecf20Sopenharmony_ci vpxor \T_key, \XMM4, \XMM4 11278c2ecf20Sopenharmony_ci vpxor \T_key, \XMM5, \XMM5 11288c2ecf20Sopenharmony_ci vpxor \T_key, \XMM6, \XMM6 11298c2ecf20Sopenharmony_ci vpxor \T_key, \XMM7, \XMM7 11308c2ecf20Sopenharmony_ci vpxor \T_key, \XMM8, \XMM8 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci i = 1 11338c2ecf20Sopenharmony_ci setreg 11348c2ecf20Sopenharmony_ci.rep \REP # do REP rounds 11358c2ecf20Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 11368c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM1, \XMM1 11378c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM2, \XMM2 11388c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM3, \XMM3 11398c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM4, \XMM4 11408c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM5, \XMM5 11418c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM6, \XMM6 11428c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM7, \XMM7 11438c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM8, \XMM8 11448c2ecf20Sopenharmony_ci i = (i+1) 11458c2ecf20Sopenharmony_ci setreg 11468c2ecf20Sopenharmony_ci.endr 11478c2ecf20Sopenharmony_ci 11488c2ecf20Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 11498c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM1, \XMM1 11508c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM2, \XMM2 11518c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM3, \XMM3 11528c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM4, \XMM4 11538c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM5, \XMM5 11548c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM6, \XMM6 11558c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM7, \XMM7 11568c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM8, \XMM8 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci vmovdqu (arg4, %r11), \T1 11598c2ecf20Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 11608c2ecf20Sopenharmony_ci vmovdqu \XMM1, (arg3 , %r11) 11618c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 11628c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM1 11638c2ecf20Sopenharmony_ci .endif 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci vmovdqu 16*1(arg4, %r11), \T1 11668c2ecf20Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 11678c2ecf20Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3 , %r11) 11688c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 11698c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM2 11708c2ecf20Sopenharmony_ci .endif 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci vmovdqu 16*2(arg4, %r11), \T1 11738c2ecf20Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 11748c2ecf20Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3 , %r11) 11758c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 11768c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM3 11778c2ecf20Sopenharmony_ci .endif 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci vmovdqu 16*3(arg4, %r11), \T1 11808c2ecf20Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 11818c2ecf20Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3 , %r11) 11828c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 11838c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM4 11848c2ecf20Sopenharmony_ci .endif 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci vmovdqu 16*4(arg4, %r11), \T1 11878c2ecf20Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 11888c2ecf20Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3 , %r11) 11898c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 11908c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM5 11918c2ecf20Sopenharmony_ci .endif 11928c2ecf20Sopenharmony_ci 11938c2ecf20Sopenharmony_ci vmovdqu 16*5(arg4, %r11), \T1 11948c2ecf20Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 11958c2ecf20Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3 , %r11) 11968c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 11978c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM6 11988c2ecf20Sopenharmony_ci .endif 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci vmovdqu 16*6(arg4, %r11), \T1 12018c2ecf20Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 12028c2ecf20Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3 , %r11) 12038c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 12048c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM7 12058c2ecf20Sopenharmony_ci .endif 12068c2ecf20Sopenharmony_ci 12078c2ecf20Sopenharmony_ci vmovdqu 16*7(arg4, %r11), \T1 12088c2ecf20Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 12098c2ecf20Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3 , %r11) 12108c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 12118c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM8 12128c2ecf20Sopenharmony_ci .endif 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_ci add $128, %r11 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 12178c2ecf20Sopenharmony_ci vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext 12188c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 12198c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 12208c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 12218c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 12228c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 12238c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 12248c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 12258c2ecf20Sopenharmony_ci 12268c2ecf20Sopenharmony_ci############################################################################### 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_ci_initial_blocks_done\@: 12298c2ecf20Sopenharmony_ci 12308c2ecf20Sopenharmony_ci.endm 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci# encrypt 8 blocks at a time 12338c2ecf20Sopenharmony_ci# ghash the 8 previously encrypted ciphertext blocks 12348c2ecf20Sopenharmony_ci# arg1, arg3, arg4 are used as pointers only, not modified 12358c2ecf20Sopenharmony_ci# r11 is the data offset value 12368c2ecf20Sopenharmony_ci.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC 12378c2ecf20Sopenharmony_ci 12388c2ecf20Sopenharmony_ci vmovdqa \XMM1, \T2 12398c2ecf20Sopenharmony_ci vmovdqa \XMM2, TMP2(%rsp) 12408c2ecf20Sopenharmony_ci vmovdqa \XMM3, TMP3(%rsp) 12418c2ecf20Sopenharmony_ci vmovdqa \XMM4, TMP4(%rsp) 12428c2ecf20Sopenharmony_ci vmovdqa \XMM5, TMP5(%rsp) 12438c2ecf20Sopenharmony_ci vmovdqa \XMM6, TMP6(%rsp) 12448c2ecf20Sopenharmony_ci vmovdqa \XMM7, TMP7(%rsp) 12458c2ecf20Sopenharmony_ci vmovdqa \XMM8, TMP8(%rsp) 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci.if \loop_idx == in_order 12488c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT 12498c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM1, \XMM2 12508c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM2, \XMM3 12518c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM3, \XMM4 12528c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM4, \XMM5 12538c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM5, \XMM6 12548c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM6, \XMM7 12558c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM7, \XMM8 12568c2ecf20Sopenharmony_ci vmovdqa \XMM8, \CTR 12578c2ecf20Sopenharmony_ci 12588c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 12598c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 12608c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 12618c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 12628c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 12638c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 12648c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 12658c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 12668c2ecf20Sopenharmony_ci.else 12678c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT 12688c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM1, \XMM2 12698c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM2, \XMM3 12708c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM3, \XMM4 12718c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM4, \XMM5 12728c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM5, \XMM6 12738c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM6, \XMM7 12748c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM7, \XMM8 12758c2ecf20Sopenharmony_ci vmovdqa \XMM8, \CTR 12768c2ecf20Sopenharmony_ci.endif 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci ####################################################################### 12808c2ecf20Sopenharmony_ci 12818c2ecf20Sopenharmony_ci vmovdqu (arg1), \T1 12828c2ecf20Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 12838c2ecf20Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 12848c2ecf20Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 12858c2ecf20Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 12868c2ecf20Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 12878c2ecf20Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 12888c2ecf20Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 12898c2ecf20Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci ####################################################################### 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci vmovdqu 16*1(arg1), \T1 12988c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 12998c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 13008c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 13018c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 13028c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 13038c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 13048c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 13058c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci vmovdqu 16*2(arg1), \T1 13088c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 13098c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 13108c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 13118c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 13128c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 13138c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 13148c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 13158c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci ####################################################################### 13198c2ecf20Sopenharmony_ci 13208c2ecf20Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 13218c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 13228c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 13238c2ecf20Sopenharmony_ci 13248c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T2, \T6 13258c2ecf20Sopenharmony_ci vpxor \T2, \T6, \T6 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci vmovdqu HashKey_8_k(arg2), \T5 13288c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T6, \T6 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_ci vmovdqu 16*3(arg1), \T1 13318c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 13328c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 13338c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 13348c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 13358c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 13368c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 13378c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 13388c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ci vmovdqa TMP2(%rsp), \T1 13418c2ecf20Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 13428c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 13438c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 13448c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 13458c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 13488c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 13498c2ecf20Sopenharmony_ci vmovdqu HashKey_7_k(arg2), \T5 13508c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 13518c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci vmovdqu 16*4(arg1), \T1 13548c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 13558c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 13568c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 13578c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 13588c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 13598c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 13608c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 13618c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 13628c2ecf20Sopenharmony_ci 13638c2ecf20Sopenharmony_ci ####################################################################### 13648c2ecf20Sopenharmony_ci 13658c2ecf20Sopenharmony_ci vmovdqa TMP3(%rsp), \T1 13668c2ecf20Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 13678c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 13688c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 13698c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 13708c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 13738c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 13748c2ecf20Sopenharmony_ci vmovdqu HashKey_6_k(arg2), \T5 13758c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 13768c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_ci vmovdqu 16*5(arg1), \T1 13798c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 13808c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 13818c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 13828c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 13838c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 13848c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 13858c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 13868c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 13878c2ecf20Sopenharmony_ci 13888c2ecf20Sopenharmony_ci vmovdqa TMP4(%rsp), \T1 13898c2ecf20Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 13908c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 13918c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 13928c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 13938c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 13968c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 13978c2ecf20Sopenharmony_ci vmovdqu HashKey_5_k(arg2), \T5 13988c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 13998c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci vmovdqu 16*6(arg1), \T1 14028c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 14038c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 14048c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 14058c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 14068c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 14078c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 14088c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 14098c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 14108c2ecf20Sopenharmony_ci 14118c2ecf20Sopenharmony_ci 14128c2ecf20Sopenharmony_ci vmovdqa TMP5(%rsp), \T1 14138c2ecf20Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 14148c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 14158c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 14168c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 14178c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 14208c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 14218c2ecf20Sopenharmony_ci vmovdqu HashKey_4_k(arg2), \T5 14228c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 14238c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_ci vmovdqu 16*7(arg1), \T1 14268c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 14278c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 14288c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 14298c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 14308c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 14318c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 14328c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 14338c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 14348c2ecf20Sopenharmony_ci 14358c2ecf20Sopenharmony_ci vmovdqa TMP6(%rsp), \T1 14368c2ecf20Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 14378c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 14388c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 14398c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 14408c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 14438c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 14448c2ecf20Sopenharmony_ci vmovdqu HashKey_3_k(arg2), \T5 14458c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 14468c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 14478c2ecf20Sopenharmony_ci 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci vmovdqu 16*8(arg1), \T1 14508c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 14518c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 14528c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 14538c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 14548c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 14558c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 14568c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 14578c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 14588c2ecf20Sopenharmony_ci 14598c2ecf20Sopenharmony_ci vmovdqa TMP7(%rsp), \T1 14608c2ecf20Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 14618c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 14628c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 14638c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 14648c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 14678c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 14688c2ecf20Sopenharmony_ci vmovdqu HashKey_2_k(arg2), \T5 14698c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 14708c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci ####################################################################### 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci vmovdqu 16*9(arg1), \T5 14758c2ecf20Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 14768c2ecf20Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 14778c2ecf20Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 14788c2ecf20Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 14798c2ecf20Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 14808c2ecf20Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 14818c2ecf20Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 14828c2ecf20Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci vmovdqa TMP8(%rsp), \T1 14858c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), \T5 14868c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 14878c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 14888c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 14898c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 14928c2ecf20Sopenharmony_ci vpxor \T1, \T3, \T3 14938c2ecf20Sopenharmony_ci vmovdqu HashKey_k(arg2), \T5 14948c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 14958c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 14988c2ecf20Sopenharmony_ci vpxor \T7, \T6, \T6 14998c2ecf20Sopenharmony_ci 15008c2ecf20Sopenharmony_ci vmovdqu 16*10(arg1), \T5 15018c2ecf20Sopenharmony_ci 15028c2ecf20Sopenharmony_ci i = 11 15038c2ecf20Sopenharmony_ci setreg 15048c2ecf20Sopenharmony_ci.rep (\REP-9) 15058c2ecf20Sopenharmony_ci 15068c2ecf20Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 15078c2ecf20Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 15088c2ecf20Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 15098c2ecf20Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 15108c2ecf20Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 15118c2ecf20Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 15128c2ecf20Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 15138c2ecf20Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_ci vmovdqu 16*i(arg1), \T5 15168c2ecf20Sopenharmony_ci i = i + 1 15178c2ecf20Sopenharmony_ci setreg 15188c2ecf20Sopenharmony_ci.endr 15198c2ecf20Sopenharmony_ci 15208c2ecf20Sopenharmony_ci i = 0 15218c2ecf20Sopenharmony_ci j = 1 15228c2ecf20Sopenharmony_ci setreg 15238c2ecf20Sopenharmony_ci.rep 8 15248c2ecf20Sopenharmony_ci vpxor 16*i(arg4, %r11), \T5, \T2 15258c2ecf20Sopenharmony_ci .if \ENC_DEC == ENC 15268c2ecf20Sopenharmony_ci vaesenclast \T2, reg_j, reg_j 15278c2ecf20Sopenharmony_ci .else 15288c2ecf20Sopenharmony_ci vaesenclast \T2, reg_j, \T3 15298c2ecf20Sopenharmony_ci vmovdqu 16*i(arg4, %r11), reg_j 15308c2ecf20Sopenharmony_ci vmovdqu \T3, 16*i(arg3, %r11) 15318c2ecf20Sopenharmony_ci .endif 15328c2ecf20Sopenharmony_ci i = (i+1) 15338c2ecf20Sopenharmony_ci j = (j+1) 15348c2ecf20Sopenharmony_ci setreg 15358c2ecf20Sopenharmony_ci.endr 15368c2ecf20Sopenharmony_ci ####################################################################### 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_ci 15398c2ecf20Sopenharmony_ci vpslldq $8, \T6, \T3 # shift-L T3 2 DWs 15408c2ecf20Sopenharmony_ci vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs 15418c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 15428c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci 15458c2ecf20Sopenharmony_ci 15468c2ecf20Sopenharmony_ci ####################################################################### 15478c2ecf20Sopenharmony_ci #first phase of the reduction 15488c2ecf20Sopenharmony_ci ####################################################################### 15498c2ecf20Sopenharmony_ci vpslld $31, \T7, \T2 # packed right shifting << 31 15508c2ecf20Sopenharmony_ci vpslld $30, \T7, \T3 # packed right shifting shift << 30 15518c2ecf20Sopenharmony_ci vpslld $25, \T7, \T4 # packed right shifting shift << 25 15528c2ecf20Sopenharmony_ci 15538c2ecf20Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 15548c2ecf20Sopenharmony_ci vpxor \T4, \T2, \T2 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci vpsrldq $4, \T2, \T1 # shift-R T1 1 DW 15578c2ecf20Sopenharmony_ci 15588c2ecf20Sopenharmony_ci vpslldq $12, \T2, \T2 # shift-L T2 3 DWs 15598c2ecf20Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 15608c2ecf20Sopenharmony_ci ####################################################################### 15618c2ecf20Sopenharmony_ci .if \ENC_DEC == ENC 15628c2ecf20Sopenharmony_ci vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer 15638c2ecf20Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer 15648c2ecf20Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer 15658c2ecf20Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer 15668c2ecf20Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer 15678c2ecf20Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer 15688c2ecf20Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer 15698c2ecf20Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer 15708c2ecf20Sopenharmony_ci .endif 15718c2ecf20Sopenharmony_ci 15728c2ecf20Sopenharmony_ci ####################################################################### 15738c2ecf20Sopenharmony_ci #second phase of the reduction 15748c2ecf20Sopenharmony_ci vpsrld $1, \T7, \T2 # packed left shifting >> 1 15758c2ecf20Sopenharmony_ci vpsrld $2, \T7, \T3 # packed left shifting >> 2 15768c2ecf20Sopenharmony_ci vpsrld $7, \T7, \T4 # packed left shifting >> 7 15778c2ecf20Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 15788c2ecf20Sopenharmony_ci vpxor \T4, \T2, \T2 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci vpxor \T1, \T2, \T2 15818c2ecf20Sopenharmony_ci vpxor \T2, \T7, \T7 15828c2ecf20Sopenharmony_ci vpxor \T7, \T6, \T6 # the result is in T6 15838c2ecf20Sopenharmony_ci ####################################################################### 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 15868c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 15878c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 15888c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 15898c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 15908c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 15918c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 15928c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci vpxor \T6, \XMM1, \XMM1 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_ci.endm 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci# GHASH the last 4 ciphertext blocks. 16038c2ecf20Sopenharmony_ci.macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci ## Karatsuba Method 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM1, \T2 16098c2ecf20Sopenharmony_ci vpxor \XMM1, \T2, \T2 16108c2ecf20Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 16118c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM1, \T6 16128c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM1, \T7 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_ci vmovdqu HashKey_8_k(arg2), \T3 16158c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \XMM1 16168c2ecf20Sopenharmony_ci 16178c2ecf20Sopenharmony_ci ###################### 16188c2ecf20Sopenharmony_ci 16198c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM2, \T2 16208c2ecf20Sopenharmony_ci vpxor \XMM2, \T2, \T2 16218c2ecf20Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 16228c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM2, \T4 16238c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM2, \T4 16268c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 16278c2ecf20Sopenharmony_ci 16288c2ecf20Sopenharmony_ci vmovdqu HashKey_7_k(arg2), \T3 16298c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 16308c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci ###################### 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM3, \T2 16358c2ecf20Sopenharmony_ci vpxor \XMM3, \T2, \T2 16368c2ecf20Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 16378c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM3, \T4 16388c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 16398c2ecf20Sopenharmony_ci 16408c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM3, \T4 16418c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 16428c2ecf20Sopenharmony_ci 16438c2ecf20Sopenharmony_ci vmovdqu HashKey_6_k(arg2), \T3 16448c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 16458c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_ci ###################### 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM4, \T2 16508c2ecf20Sopenharmony_ci vpxor \XMM4, \T2, \T2 16518c2ecf20Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 16528c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM4, \T4 16538c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM4, \T4 16568c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 16578c2ecf20Sopenharmony_ci 16588c2ecf20Sopenharmony_ci vmovdqu HashKey_5_k(arg2), \T3 16598c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 16608c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci ###################### 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM5, \T2 16658c2ecf20Sopenharmony_ci vpxor \XMM5, \T2, \T2 16668c2ecf20Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 16678c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM5, \T4 16688c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 16698c2ecf20Sopenharmony_ci 16708c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM5, \T4 16718c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_ci vmovdqu HashKey_4_k(arg2), \T3 16748c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 16758c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci ###################### 16788c2ecf20Sopenharmony_ci 16798c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM6, \T2 16808c2ecf20Sopenharmony_ci vpxor \XMM6, \T2, \T2 16818c2ecf20Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 16828c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM6, \T4 16838c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM6, \T4 16868c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_ci vmovdqu HashKey_3_k(arg2), \T3 16898c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 16908c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_ci ###################### 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM7, \T2 16958c2ecf20Sopenharmony_ci vpxor \XMM7, \T2, \T2 16968c2ecf20Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 16978c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM7, \T4 16988c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 16998c2ecf20Sopenharmony_ci 17008c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM7, \T4 17018c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci vmovdqu HashKey_2_k(arg2), \T3 17048c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 17058c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_ci ###################### 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM8, \T2 17108c2ecf20Sopenharmony_ci vpxor \XMM8, \T2, \T2 17118c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), \T5 17128c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM8, \T4 17138c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 17148c2ecf20Sopenharmony_ci 17158c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM8, \T4 17168c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci vmovdqu HashKey_k(arg2), \T3 17198c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 17228c2ecf20Sopenharmony_ci vpxor \T6, \XMM1, \XMM1 17238c2ecf20Sopenharmony_ci vpxor \T7, \XMM1, \T2 17248c2ecf20Sopenharmony_ci 17258c2ecf20Sopenharmony_ci 17268c2ecf20Sopenharmony_ci 17278c2ecf20Sopenharmony_ci 17288c2ecf20Sopenharmony_ci vpslldq $8, \T2, \T4 17298c2ecf20Sopenharmony_ci vpsrldq $8, \T2, \T2 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 17328c2ecf20Sopenharmony_ci vpxor \T2, \T6, \T6 # <T6:T7> holds the result of 17338c2ecf20Sopenharmony_ci # the accumulated carry-less multiplications 17348c2ecf20Sopenharmony_ci 17358c2ecf20Sopenharmony_ci ####################################################################### 17368c2ecf20Sopenharmony_ci #first phase of the reduction 17378c2ecf20Sopenharmony_ci vpslld $31, \T7, \T2 # packed right shifting << 31 17388c2ecf20Sopenharmony_ci vpslld $30, \T7, \T3 # packed right shifting shift << 30 17398c2ecf20Sopenharmony_ci vpslld $25, \T7, \T4 # packed right shifting shift << 25 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 17428c2ecf20Sopenharmony_ci vpxor \T4, \T2, \T2 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_ci vpsrldq $4, \T2, \T1 # shift-R T1 1 DW 17458c2ecf20Sopenharmony_ci 17468c2ecf20Sopenharmony_ci vpslldq $12, \T2, \T2 # shift-L T2 3 DWs 17478c2ecf20Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 17488c2ecf20Sopenharmony_ci ####################################################################### 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci 17518c2ecf20Sopenharmony_ci #second phase of the reduction 17528c2ecf20Sopenharmony_ci vpsrld $1, \T7, \T2 # packed left shifting >> 1 17538c2ecf20Sopenharmony_ci vpsrld $2, \T7, \T3 # packed left shifting >> 2 17548c2ecf20Sopenharmony_ci vpsrld $7, \T7, \T4 # packed left shifting >> 7 17558c2ecf20Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 17568c2ecf20Sopenharmony_ci vpxor \T4, \T2, \T2 17578c2ecf20Sopenharmony_ci 17588c2ecf20Sopenharmony_ci vpxor \T1, \T2, \T2 17598c2ecf20Sopenharmony_ci vpxor \T2, \T7, \T7 17608c2ecf20Sopenharmony_ci vpxor \T7, \T6, \T6 # the result is in T6 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci.endm 17638c2ecf20Sopenharmony_ci 17648c2ecf20Sopenharmony_ci############################################################# 17658c2ecf20Sopenharmony_ci#void aesni_gcm_precomp_avx_gen2 17668c2ecf20Sopenharmony_ci# (gcm_data *my_ctx_data, 17678c2ecf20Sopenharmony_ci# gcm_context_data *data, 17688c2ecf20Sopenharmony_ci# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ 17698c2ecf20Sopenharmony_ci# u8 *iv, /* Pre-counter block j0: 4 byte salt 17708c2ecf20Sopenharmony_ci# (from Security Association) concatenated with 8 byte 17718c2ecf20Sopenharmony_ci# Initialisation Vector (from IPSec ESP Payload) 17728c2ecf20Sopenharmony_ci# concatenated with 0x00000001. 16-byte aligned pointer. */ 17738c2ecf20Sopenharmony_ci# const u8 *aad, /* Additional Authentication Data (AAD)*/ 17748c2ecf20Sopenharmony_ci# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ 17758c2ecf20Sopenharmony_ci############################################################# 17768c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_init_avx_gen2) 17778c2ecf20Sopenharmony_ci FUNC_SAVE 17788c2ecf20Sopenharmony_ci INIT GHASH_MUL_AVX, PRECOMPUTE_AVX 17798c2ecf20Sopenharmony_ci FUNC_RESTORE 17808c2ecf20Sopenharmony_ci RET 17818c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_init_avx_gen2) 17828c2ecf20Sopenharmony_ci 17838c2ecf20Sopenharmony_ci############################################################################### 17848c2ecf20Sopenharmony_ci#void aesni_gcm_enc_update_avx_gen2( 17858c2ecf20Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 17868c2ecf20Sopenharmony_ci# gcm_context_data *data, 17878c2ecf20Sopenharmony_ci# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ 17888c2ecf20Sopenharmony_ci# const u8 *in, /* Plaintext input */ 17898c2ecf20Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 17908c2ecf20Sopenharmony_ci############################################################################### 17918c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) 17928c2ecf20Sopenharmony_ci FUNC_SAVE 17938c2ecf20Sopenharmony_ci mov keysize, %eax 17948c2ecf20Sopenharmony_ci cmp $32, %eax 17958c2ecf20Sopenharmony_ci je key_256_enc_update 17968c2ecf20Sopenharmony_ci cmp $16, %eax 17978c2ecf20Sopenharmony_ci je key_128_enc_update 17988c2ecf20Sopenharmony_ci # must be 192 17998c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 18008c2ecf20Sopenharmony_ci FUNC_RESTORE 18018c2ecf20Sopenharmony_ci RET 18028c2ecf20Sopenharmony_cikey_128_enc_update: 18038c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 18048c2ecf20Sopenharmony_ci FUNC_RESTORE 18058c2ecf20Sopenharmony_ci RET 18068c2ecf20Sopenharmony_cikey_256_enc_update: 18078c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 18088c2ecf20Sopenharmony_ci FUNC_RESTORE 18098c2ecf20Sopenharmony_ci RET 18108c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci############################################################################### 18138c2ecf20Sopenharmony_ci#void aesni_gcm_dec_update_avx_gen2( 18148c2ecf20Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 18158c2ecf20Sopenharmony_ci# gcm_context_data *data, 18168c2ecf20Sopenharmony_ci# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ 18178c2ecf20Sopenharmony_ci# const u8 *in, /* Ciphertext input */ 18188c2ecf20Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 18198c2ecf20Sopenharmony_ci############################################################################### 18208c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) 18218c2ecf20Sopenharmony_ci FUNC_SAVE 18228c2ecf20Sopenharmony_ci mov keysize,%eax 18238c2ecf20Sopenharmony_ci cmp $32, %eax 18248c2ecf20Sopenharmony_ci je key_256_dec_update 18258c2ecf20Sopenharmony_ci cmp $16, %eax 18268c2ecf20Sopenharmony_ci je key_128_dec_update 18278c2ecf20Sopenharmony_ci # must be 192 18288c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 18298c2ecf20Sopenharmony_ci FUNC_RESTORE 18308c2ecf20Sopenharmony_ci RET 18318c2ecf20Sopenharmony_cikey_128_dec_update: 18328c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 18338c2ecf20Sopenharmony_ci FUNC_RESTORE 18348c2ecf20Sopenharmony_ci RET 18358c2ecf20Sopenharmony_cikey_256_dec_update: 18368c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 18378c2ecf20Sopenharmony_ci FUNC_RESTORE 18388c2ecf20Sopenharmony_ci RET 18398c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci############################################################################### 18428c2ecf20Sopenharmony_ci#void aesni_gcm_finalize_avx_gen2( 18438c2ecf20Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 18448c2ecf20Sopenharmony_ci# gcm_context_data *data, 18458c2ecf20Sopenharmony_ci# u8 *auth_tag, /* Authenticated Tag output. */ 18468c2ecf20Sopenharmony_ci# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. 18478c2ecf20Sopenharmony_ci# Valid values are 16 (most likely), 12 or 8. */ 18488c2ecf20Sopenharmony_ci############################################################################### 18498c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_finalize_avx_gen2) 18508c2ecf20Sopenharmony_ci FUNC_SAVE 18518c2ecf20Sopenharmony_ci mov keysize,%eax 18528c2ecf20Sopenharmony_ci cmp $32, %eax 18538c2ecf20Sopenharmony_ci je key_256_finalize 18548c2ecf20Sopenharmony_ci cmp $16, %eax 18558c2ecf20Sopenharmony_ci je key_128_finalize 18568c2ecf20Sopenharmony_ci # must be 192 18578c2ecf20Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 18588c2ecf20Sopenharmony_ci FUNC_RESTORE 18598c2ecf20Sopenharmony_ci RET 18608c2ecf20Sopenharmony_cikey_128_finalize: 18618c2ecf20Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 18628c2ecf20Sopenharmony_ci FUNC_RESTORE 18638c2ecf20Sopenharmony_ci RET 18648c2ecf20Sopenharmony_cikey_256_finalize: 18658c2ecf20Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 18668c2ecf20Sopenharmony_ci FUNC_RESTORE 18678c2ecf20Sopenharmony_ci RET 18688c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_finalize_avx_gen2) 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_ci############################################################################### 18718c2ecf20Sopenharmony_ci# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 18728c2ecf20Sopenharmony_ci# Input: A and B (128-bits each, bit-reflected) 18738c2ecf20Sopenharmony_ci# Output: C = A*B*x mod poly, (i.e. >>1 ) 18748c2ecf20Sopenharmony_ci# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input 18758c2ecf20Sopenharmony_ci# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. 18768c2ecf20Sopenharmony_ci############################################################################### 18778c2ecf20Sopenharmony_ci.macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_ci vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 18808c2ecf20Sopenharmony_ci vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 18818c2ecf20Sopenharmony_ci vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 18828c2ecf20Sopenharmony_ci vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 18838c2ecf20Sopenharmony_ci vpxor \T3, \GH, \GH 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_ci vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs 18878c2ecf20Sopenharmony_ci vpslldq $8 , \GH, \GH # shift-L GH 2 DWs 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_ci vpxor \T3, \T1, \T1 18908c2ecf20Sopenharmony_ci vpxor \T2, \GH, \GH 18918c2ecf20Sopenharmony_ci 18928c2ecf20Sopenharmony_ci ####################################################################### 18938c2ecf20Sopenharmony_ci #first phase of the reduction 18948c2ecf20Sopenharmony_ci vmovdqa POLY2(%rip), \T3 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \GH, \T3, \T2 18978c2ecf20Sopenharmony_ci vpslldq $8, \T2, \T2 # shift-L T2 2 DWs 18988c2ecf20Sopenharmony_ci 18998c2ecf20Sopenharmony_ci vpxor \T2, \GH, \GH # first phase of the reduction complete 19008c2ecf20Sopenharmony_ci ####################################################################### 19018c2ecf20Sopenharmony_ci #second phase of the reduction 19028c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \GH, \T3, \T2 19038c2ecf20Sopenharmony_ci vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) 19048c2ecf20Sopenharmony_ci 19058c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \GH, \T3, \GH 19068c2ecf20Sopenharmony_ci vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) 19078c2ecf20Sopenharmony_ci 19088c2ecf20Sopenharmony_ci vpxor \T2, \GH, \GH # second phase of the reduction complete 19098c2ecf20Sopenharmony_ci ####################################################################### 19108c2ecf20Sopenharmony_ci vpxor \T1, \GH, \GH # the result is in GH 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci 19138c2ecf20Sopenharmony_ci.endm 19148c2ecf20Sopenharmony_ci 19158c2ecf20Sopenharmony_ci.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 19168c2ecf20Sopenharmony_ci 19178c2ecf20Sopenharmony_ci # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 19188c2ecf20Sopenharmony_ci vmovdqa \HK, \T5 19198c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly 19208c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly 19218c2ecf20Sopenharmony_ci 19228c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly 19238c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_3(arg2) 19248c2ecf20Sopenharmony_ci 19258c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly 19268c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_4(arg2) 19278c2ecf20Sopenharmony_ci 19288c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly 19298c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_5(arg2) 19308c2ecf20Sopenharmony_ci 19318c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly 19328c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_6(arg2) 19338c2ecf20Sopenharmony_ci 19348c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly 19358c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_7(arg2) 19368c2ecf20Sopenharmony_ci 19378c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly 19388c2ecf20Sopenharmony_ci vmovdqu \T5, HashKey_8(arg2) 19398c2ecf20Sopenharmony_ci 19408c2ecf20Sopenharmony_ci.endm 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci## if a = number of total plaintext bytes 19438c2ecf20Sopenharmony_ci## b = floor(a/16) 19448c2ecf20Sopenharmony_ci## num_initial_blocks = b mod 4# 19458c2ecf20Sopenharmony_ci## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext 19468c2ecf20Sopenharmony_ci## r10, r11, r12, rax are clobbered 19478c2ecf20Sopenharmony_ci## arg1, arg3, arg4, r14 are used as a pointer only, not modified 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER 19508c2ecf20Sopenharmony_ci i = (8-\num_initial_blocks) 19518c2ecf20Sopenharmony_ci setreg 19528c2ecf20Sopenharmony_ci vmovdqu AadHash(arg2), reg_i 19538c2ecf20Sopenharmony_ci 19548c2ecf20Sopenharmony_ci # start AES for num_initial_blocks blocks 19558c2ecf20Sopenharmony_ci vmovdqu CurCount(arg2), \CTR 19568c2ecf20Sopenharmony_ci 19578c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 19588c2ecf20Sopenharmony_ci setreg 19598c2ecf20Sopenharmony_ci.rep \num_initial_blocks 19608c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 19618c2ecf20Sopenharmony_ci vmovdqa \CTR, reg_i 19628c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap 19638c2ecf20Sopenharmony_ci i = (i+1) 19648c2ecf20Sopenharmony_ci setreg 19658c2ecf20Sopenharmony_ci.endr 19668c2ecf20Sopenharmony_ci 19678c2ecf20Sopenharmony_ci vmovdqa (arg1), \T_key 19688c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 19698c2ecf20Sopenharmony_ci setreg 19708c2ecf20Sopenharmony_ci.rep \num_initial_blocks 19718c2ecf20Sopenharmony_ci vpxor \T_key, reg_i, reg_i 19728c2ecf20Sopenharmony_ci i = (i+1) 19738c2ecf20Sopenharmony_ci setreg 19748c2ecf20Sopenharmony_ci.endr 19758c2ecf20Sopenharmony_ci 19768c2ecf20Sopenharmony_ci j = 1 19778c2ecf20Sopenharmony_ci setreg 19788c2ecf20Sopenharmony_ci.rep \REP 19798c2ecf20Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 19808c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 19818c2ecf20Sopenharmony_ci setreg 19828c2ecf20Sopenharmony_ci.rep \num_initial_blocks 19838c2ecf20Sopenharmony_ci vaesenc \T_key, reg_i, reg_i 19848c2ecf20Sopenharmony_ci i = (i+1) 19858c2ecf20Sopenharmony_ci setreg 19868c2ecf20Sopenharmony_ci.endr 19878c2ecf20Sopenharmony_ci 19888c2ecf20Sopenharmony_ci j = (j+1) 19898c2ecf20Sopenharmony_ci setreg 19908c2ecf20Sopenharmony_ci.endr 19918c2ecf20Sopenharmony_ci 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 19948c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 19958c2ecf20Sopenharmony_ci setreg 19968c2ecf20Sopenharmony_ci.rep \num_initial_blocks 19978c2ecf20Sopenharmony_ci vaesenclast \T_key, reg_i, reg_i 19988c2ecf20Sopenharmony_ci i = (i+1) 19998c2ecf20Sopenharmony_ci setreg 20008c2ecf20Sopenharmony_ci.endr 20018c2ecf20Sopenharmony_ci 20028c2ecf20Sopenharmony_ci i = (9-\num_initial_blocks) 20038c2ecf20Sopenharmony_ci setreg 20048c2ecf20Sopenharmony_ci.rep \num_initial_blocks 20058c2ecf20Sopenharmony_ci vmovdqu (arg4, %r11), \T1 20068c2ecf20Sopenharmony_ci vpxor \T1, reg_i, reg_i 20078c2ecf20Sopenharmony_ci vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for 20088c2ecf20Sopenharmony_ci # num_initial_blocks blocks 20098c2ecf20Sopenharmony_ci add $16, %r11 20108c2ecf20Sopenharmony_ci.if \ENC_DEC == DEC 20118c2ecf20Sopenharmony_ci vmovdqa \T1, reg_i 20128c2ecf20Sopenharmony_ci.endif 20138c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations 20148c2ecf20Sopenharmony_ci i = (i+1) 20158c2ecf20Sopenharmony_ci setreg 20168c2ecf20Sopenharmony_ci.endr 20178c2ecf20Sopenharmony_ci 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ci i = (8-\num_initial_blocks) 20208c2ecf20Sopenharmony_ci j = (9-\num_initial_blocks) 20218c2ecf20Sopenharmony_ci setreg 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci.rep \num_initial_blocks 20248c2ecf20Sopenharmony_ci vpxor reg_i, reg_j, reg_j 20258c2ecf20Sopenharmony_ci GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks 20268c2ecf20Sopenharmony_ci i = (i+1) 20278c2ecf20Sopenharmony_ci j = (j+1) 20288c2ecf20Sopenharmony_ci setreg 20298c2ecf20Sopenharmony_ci.endr 20308c2ecf20Sopenharmony_ci # XMM8 has the combined result here 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_ci vmovdqa \XMM8, TMP1(%rsp) 20338c2ecf20Sopenharmony_ci vmovdqa \XMM8, \T3 20348c2ecf20Sopenharmony_ci 20358c2ecf20Sopenharmony_ci cmp $128, %r13 20368c2ecf20Sopenharmony_ci jl _initial_blocks_done\@ # no need for precomputed constants 20378c2ecf20Sopenharmony_ci 20388c2ecf20Sopenharmony_ci############################################################################### 20398c2ecf20Sopenharmony_ci# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 20408c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20418c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM1 20428c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 20438c2ecf20Sopenharmony_ci 20448c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20458c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM2 20468c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 20478c2ecf20Sopenharmony_ci 20488c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20498c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM3 20508c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 20518c2ecf20Sopenharmony_ci 20528c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20538c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM4 20548c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20578c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM5 20588c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20618c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM6 20628c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 20638c2ecf20Sopenharmony_ci 20648c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20658c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM7 20668c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 20678c2ecf20Sopenharmony_ci 20688c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 20698c2ecf20Sopenharmony_ci vmovdqa \CTR, \XMM8 20708c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 20718c2ecf20Sopenharmony_ci 20728c2ecf20Sopenharmony_ci vmovdqa (arg1), \T_key 20738c2ecf20Sopenharmony_ci vpxor \T_key, \XMM1, \XMM1 20748c2ecf20Sopenharmony_ci vpxor \T_key, \XMM2, \XMM2 20758c2ecf20Sopenharmony_ci vpxor \T_key, \XMM3, \XMM3 20768c2ecf20Sopenharmony_ci vpxor \T_key, \XMM4, \XMM4 20778c2ecf20Sopenharmony_ci vpxor \T_key, \XMM5, \XMM5 20788c2ecf20Sopenharmony_ci vpxor \T_key, \XMM6, \XMM6 20798c2ecf20Sopenharmony_ci vpxor \T_key, \XMM7, \XMM7 20808c2ecf20Sopenharmony_ci vpxor \T_key, \XMM8, \XMM8 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_ci i = 1 20838c2ecf20Sopenharmony_ci setreg 20848c2ecf20Sopenharmony_ci.rep \REP # do REP rounds 20858c2ecf20Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 20868c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM1, \XMM1 20878c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM2, \XMM2 20888c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM3, \XMM3 20898c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM4, \XMM4 20908c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM5, \XMM5 20918c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM6, \XMM6 20928c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM7, \XMM7 20938c2ecf20Sopenharmony_ci vaesenc \T_key, \XMM8, \XMM8 20948c2ecf20Sopenharmony_ci i = (i+1) 20958c2ecf20Sopenharmony_ci setreg 20968c2ecf20Sopenharmony_ci.endr 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci 20998c2ecf20Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 21008c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM1, \XMM1 21018c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM2, \XMM2 21028c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM3, \XMM3 21038c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM4, \XMM4 21048c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM5, \XMM5 21058c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM6, \XMM6 21068c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM7, \XMM7 21078c2ecf20Sopenharmony_ci vaesenclast \T_key, \XMM8, \XMM8 21088c2ecf20Sopenharmony_ci 21098c2ecf20Sopenharmony_ci vmovdqu (arg4, %r11), \T1 21108c2ecf20Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 21118c2ecf20Sopenharmony_ci vmovdqu \XMM1, (arg3 , %r11) 21128c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21138c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM1 21148c2ecf20Sopenharmony_ci .endif 21158c2ecf20Sopenharmony_ci 21168c2ecf20Sopenharmony_ci vmovdqu 16*1(arg4, %r11), \T1 21178c2ecf20Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 21188c2ecf20Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3 , %r11) 21198c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21208c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM2 21218c2ecf20Sopenharmony_ci .endif 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci vmovdqu 16*2(arg4, %r11), \T1 21248c2ecf20Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 21258c2ecf20Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3 , %r11) 21268c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21278c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM3 21288c2ecf20Sopenharmony_ci .endif 21298c2ecf20Sopenharmony_ci 21308c2ecf20Sopenharmony_ci vmovdqu 16*3(arg4, %r11), \T1 21318c2ecf20Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 21328c2ecf20Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3 , %r11) 21338c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21348c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM4 21358c2ecf20Sopenharmony_ci .endif 21368c2ecf20Sopenharmony_ci 21378c2ecf20Sopenharmony_ci vmovdqu 16*4(arg4, %r11), \T1 21388c2ecf20Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 21398c2ecf20Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3 , %r11) 21408c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21418c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM5 21428c2ecf20Sopenharmony_ci .endif 21438c2ecf20Sopenharmony_ci 21448c2ecf20Sopenharmony_ci vmovdqu 16*5(arg4, %r11), \T1 21458c2ecf20Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 21468c2ecf20Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3 , %r11) 21478c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21488c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM6 21498c2ecf20Sopenharmony_ci .endif 21508c2ecf20Sopenharmony_ci 21518c2ecf20Sopenharmony_ci vmovdqu 16*6(arg4, %r11), \T1 21528c2ecf20Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 21538c2ecf20Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3 , %r11) 21548c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21558c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM7 21568c2ecf20Sopenharmony_ci .endif 21578c2ecf20Sopenharmony_ci 21588c2ecf20Sopenharmony_ci vmovdqu 16*7(arg4, %r11), \T1 21598c2ecf20Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 21608c2ecf20Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3 , %r11) 21618c2ecf20Sopenharmony_ci .if \ENC_DEC == DEC 21628c2ecf20Sopenharmony_ci vmovdqa \T1, \XMM8 21638c2ecf20Sopenharmony_ci .endif 21648c2ecf20Sopenharmony_ci 21658c2ecf20Sopenharmony_ci add $128, %r11 21668c2ecf20Sopenharmony_ci 21678c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 21688c2ecf20Sopenharmony_ci vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with 21698c2ecf20Sopenharmony_ci # the corresponding ciphertext 21708c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 21718c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 21728c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 21738c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 21748c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 21758c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 21768c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci############################################################################### 21798c2ecf20Sopenharmony_ci 21808c2ecf20Sopenharmony_ci_initial_blocks_done\@: 21818c2ecf20Sopenharmony_ci 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci.endm 21848c2ecf20Sopenharmony_ci 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci 21878c2ecf20Sopenharmony_ci# encrypt 8 blocks at a time 21888c2ecf20Sopenharmony_ci# ghash the 8 previously encrypted ciphertext blocks 21898c2ecf20Sopenharmony_ci# arg1, arg3, arg4 are used as pointers only, not modified 21908c2ecf20Sopenharmony_ci# r11 is the data offset value 21918c2ecf20Sopenharmony_ci.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci vmovdqa \XMM1, \T2 21948c2ecf20Sopenharmony_ci vmovdqa \XMM2, TMP2(%rsp) 21958c2ecf20Sopenharmony_ci vmovdqa \XMM3, TMP3(%rsp) 21968c2ecf20Sopenharmony_ci vmovdqa \XMM4, TMP4(%rsp) 21978c2ecf20Sopenharmony_ci vmovdqa \XMM5, TMP5(%rsp) 21988c2ecf20Sopenharmony_ci vmovdqa \XMM6, TMP6(%rsp) 21998c2ecf20Sopenharmony_ci vmovdqa \XMM7, TMP7(%rsp) 22008c2ecf20Sopenharmony_ci vmovdqa \XMM8, TMP8(%rsp) 22018c2ecf20Sopenharmony_ci 22028c2ecf20Sopenharmony_ci.if \loop_idx == in_order 22038c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT 22048c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM1, \XMM2 22058c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM2, \XMM3 22068c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM3, \XMM4 22078c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM4, \XMM5 22088c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM5, \XMM6 22098c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM6, \XMM7 22108c2ecf20Sopenharmony_ci vpaddd ONE(%rip), \XMM7, \XMM8 22118c2ecf20Sopenharmony_ci vmovdqa \XMM8, \CTR 22128c2ecf20Sopenharmony_ci 22138c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 22148c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 22158c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 22168c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 22178c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 22188c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 22198c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 22208c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 22218c2ecf20Sopenharmony_ci.else 22228c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT 22238c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM1, \XMM2 22248c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM2, \XMM3 22258c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM3, \XMM4 22268c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM4, \XMM5 22278c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM5, \XMM6 22288c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM6, \XMM7 22298c2ecf20Sopenharmony_ci vpaddd ONEf(%rip), \XMM7, \XMM8 22308c2ecf20Sopenharmony_ci vmovdqa \XMM8, \CTR 22318c2ecf20Sopenharmony_ci.endif 22328c2ecf20Sopenharmony_ci 22338c2ecf20Sopenharmony_ci 22348c2ecf20Sopenharmony_ci ####################################################################### 22358c2ecf20Sopenharmony_ci 22368c2ecf20Sopenharmony_ci vmovdqu (arg1), \T1 22378c2ecf20Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 22388c2ecf20Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 22398c2ecf20Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 22408c2ecf20Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 22418c2ecf20Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 22428c2ecf20Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 22438c2ecf20Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 22448c2ecf20Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 22458c2ecf20Sopenharmony_ci 22468c2ecf20Sopenharmony_ci ####################################################################### 22478c2ecf20Sopenharmony_ci 22488c2ecf20Sopenharmony_ci 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_ci 22528c2ecf20Sopenharmony_ci vmovdqu 16*1(arg1), \T1 22538c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 22548c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 22558c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 22568c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 22578c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 22588c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 22598c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 22608c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci vmovdqu 16*2(arg1), \T1 22638c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 22648c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 22658c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 22668c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 22678c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 22688c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 22698c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 22708c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 22718c2ecf20Sopenharmony_ci 22728c2ecf20Sopenharmony_ci 22738c2ecf20Sopenharmony_ci ####################################################################### 22748c2ecf20Sopenharmony_ci 22758c2ecf20Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 22768c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 22778c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 22788c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 22798c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 22808c2ecf20Sopenharmony_ci vpxor \T5, \T6, \T6 22818c2ecf20Sopenharmony_ci 22828c2ecf20Sopenharmony_ci vmovdqu 16*3(arg1), \T1 22838c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 22848c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 22858c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 22868c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 22878c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 22888c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 22898c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 22908c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 22918c2ecf20Sopenharmony_ci 22928c2ecf20Sopenharmony_ci vmovdqa TMP2(%rsp), \T1 22938c2ecf20Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 22948c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 22958c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 22968c2ecf20Sopenharmony_ci 22978c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 22988c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 22998c2ecf20Sopenharmony_ci 23008c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 23018c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23028c2ecf20Sopenharmony_ci 23038c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 23048c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_ci vmovdqu 16*4(arg1), \T1 23078c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 23088c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 23098c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 23108c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 23118c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 23128c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 23138c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 23148c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci ####################################################################### 23178c2ecf20Sopenharmony_ci 23188c2ecf20Sopenharmony_ci vmovdqa TMP3(%rsp), \T1 23198c2ecf20Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 23208c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 23218c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 23248c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 23258c2ecf20Sopenharmony_ci 23268c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 23278c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23288c2ecf20Sopenharmony_ci 23298c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 23308c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23318c2ecf20Sopenharmony_ci 23328c2ecf20Sopenharmony_ci vmovdqu 16*5(arg1), \T1 23338c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 23348c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 23358c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 23368c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 23378c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 23388c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 23398c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 23408c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 23418c2ecf20Sopenharmony_ci 23428c2ecf20Sopenharmony_ci vmovdqa TMP4(%rsp), \T1 23438c2ecf20Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 23448c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 23458c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 23468c2ecf20Sopenharmony_ci 23478c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 23488c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 23498c2ecf20Sopenharmony_ci 23508c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 23518c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23528c2ecf20Sopenharmony_ci 23538c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 23548c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_ci vmovdqu 16*6(arg1), \T1 23578c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 23588c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 23598c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 23608c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 23618c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 23628c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 23638c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 23648c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 23658c2ecf20Sopenharmony_ci 23668c2ecf20Sopenharmony_ci 23678c2ecf20Sopenharmony_ci vmovdqa TMP5(%rsp), \T1 23688c2ecf20Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 23698c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 23708c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 23718c2ecf20Sopenharmony_ci 23728c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 23738c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 23748c2ecf20Sopenharmony_ci 23758c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 23768c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23778c2ecf20Sopenharmony_ci 23788c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 23798c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 23808c2ecf20Sopenharmony_ci 23818c2ecf20Sopenharmony_ci vmovdqu 16*7(arg1), \T1 23828c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 23838c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 23848c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 23858c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 23868c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 23878c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 23888c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 23898c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 23908c2ecf20Sopenharmony_ci 23918c2ecf20Sopenharmony_ci vmovdqa TMP6(%rsp), \T1 23928c2ecf20Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 23938c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 23948c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 23958c2ecf20Sopenharmony_ci 23968c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 23978c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 23988c2ecf20Sopenharmony_ci 23998c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 24008c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 24018c2ecf20Sopenharmony_ci 24028c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 24038c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 24048c2ecf20Sopenharmony_ci 24058c2ecf20Sopenharmony_ci vmovdqu 16*8(arg1), \T1 24068c2ecf20Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 24078c2ecf20Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 24088c2ecf20Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 24098c2ecf20Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 24108c2ecf20Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 24118c2ecf20Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 24128c2ecf20Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 24138c2ecf20Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 24148c2ecf20Sopenharmony_ci 24158c2ecf20Sopenharmony_ci vmovdqa TMP7(%rsp), \T1 24168c2ecf20Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 24178c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 24188c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T4 24198c2ecf20Sopenharmony_ci 24208c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 24218c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 24228c2ecf20Sopenharmony_ci 24238c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 24248c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 24258c2ecf20Sopenharmony_ci 24268c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 24278c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 24288c2ecf20Sopenharmony_ci 24298c2ecf20Sopenharmony_ci 24308c2ecf20Sopenharmony_ci ####################################################################### 24318c2ecf20Sopenharmony_ci 24328c2ecf20Sopenharmony_ci vmovdqu 16*9(arg1), \T5 24338c2ecf20Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 24348c2ecf20Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 24358c2ecf20Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 24368c2ecf20Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 24378c2ecf20Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 24388c2ecf20Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 24398c2ecf20Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 24408c2ecf20Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci vmovdqa TMP8(%rsp), \T1 24438c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), \T5 24448c2ecf20Sopenharmony_ci 24458c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 24468c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 24478c2ecf20Sopenharmony_ci 24488c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 24498c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 24508c2ecf20Sopenharmony_ci 24518c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 24528c2ecf20Sopenharmony_ci vpxor \T3, \T6, \T6 24538c2ecf20Sopenharmony_ci 24548c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 24558c2ecf20Sopenharmony_ci vpxor \T3, \T4, \T1 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_ci 24588c2ecf20Sopenharmony_ci vmovdqu 16*10(arg1), \T5 24598c2ecf20Sopenharmony_ci 24608c2ecf20Sopenharmony_ci i = 11 24618c2ecf20Sopenharmony_ci setreg 24628c2ecf20Sopenharmony_ci.rep (\REP-9) 24638c2ecf20Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 24648c2ecf20Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 24658c2ecf20Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 24668c2ecf20Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 24678c2ecf20Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 24688c2ecf20Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 24698c2ecf20Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 24708c2ecf20Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 24718c2ecf20Sopenharmony_ci 24728c2ecf20Sopenharmony_ci vmovdqu 16*i(arg1), \T5 24738c2ecf20Sopenharmony_ci i = i + 1 24748c2ecf20Sopenharmony_ci setreg 24758c2ecf20Sopenharmony_ci.endr 24768c2ecf20Sopenharmony_ci 24778c2ecf20Sopenharmony_ci i = 0 24788c2ecf20Sopenharmony_ci j = 1 24798c2ecf20Sopenharmony_ci setreg 24808c2ecf20Sopenharmony_ci.rep 8 24818c2ecf20Sopenharmony_ci vpxor 16*i(arg4, %r11), \T5, \T2 24828c2ecf20Sopenharmony_ci .if \ENC_DEC == ENC 24838c2ecf20Sopenharmony_ci vaesenclast \T2, reg_j, reg_j 24848c2ecf20Sopenharmony_ci .else 24858c2ecf20Sopenharmony_ci vaesenclast \T2, reg_j, \T3 24868c2ecf20Sopenharmony_ci vmovdqu 16*i(arg4, %r11), reg_j 24878c2ecf20Sopenharmony_ci vmovdqu \T3, 16*i(arg3, %r11) 24888c2ecf20Sopenharmony_ci .endif 24898c2ecf20Sopenharmony_ci i = (i+1) 24908c2ecf20Sopenharmony_ci j = (j+1) 24918c2ecf20Sopenharmony_ci setreg 24928c2ecf20Sopenharmony_ci.endr 24938c2ecf20Sopenharmony_ci ####################################################################### 24948c2ecf20Sopenharmony_ci 24958c2ecf20Sopenharmony_ci 24968c2ecf20Sopenharmony_ci vpslldq $8, \T6, \T3 # shift-L T3 2 DWs 24978c2ecf20Sopenharmony_ci vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs 24988c2ecf20Sopenharmony_ci vpxor \T3, \T7, \T7 24998c2ecf20Sopenharmony_ci vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 25008c2ecf20Sopenharmony_ci 25018c2ecf20Sopenharmony_ci 25028c2ecf20Sopenharmony_ci 25038c2ecf20Sopenharmony_ci ####################################################################### 25048c2ecf20Sopenharmony_ci #first phase of the reduction 25058c2ecf20Sopenharmony_ci vmovdqa POLY2(%rip), \T3 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T7, \T3, \T2 25088c2ecf20Sopenharmony_ci vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs 25098c2ecf20Sopenharmony_ci 25108c2ecf20Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 25118c2ecf20Sopenharmony_ci ####################################################################### 25128c2ecf20Sopenharmony_ci .if \ENC_DEC == ENC 25138c2ecf20Sopenharmony_ci vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer 25148c2ecf20Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer 25158c2ecf20Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer 25168c2ecf20Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer 25178c2ecf20Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer 25188c2ecf20Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer 25198c2ecf20Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer 25208c2ecf20Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer 25218c2ecf20Sopenharmony_ci .endif 25228c2ecf20Sopenharmony_ci 25238c2ecf20Sopenharmony_ci ####################################################################### 25248c2ecf20Sopenharmony_ci #second phase of the reduction 25258c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T7, \T3, \T2 25268c2ecf20Sopenharmony_ci vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) 25278c2ecf20Sopenharmony_ci 25288c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T7, \T3, \T4 25298c2ecf20Sopenharmony_ci vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) 25308c2ecf20Sopenharmony_ci 25318c2ecf20Sopenharmony_ci vpxor \T2, \T4, \T4 # second phase of the reduction complete 25328c2ecf20Sopenharmony_ci ####################################################################### 25338c2ecf20Sopenharmony_ci vpxor \T4, \T1, \T1 # the result is in T1 25348c2ecf20Sopenharmony_ci 25358c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 25368c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 25378c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 25388c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 25398c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 25408c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 25418c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 25428c2ecf20Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 25438c2ecf20Sopenharmony_ci 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 25468c2ecf20Sopenharmony_ci 25478c2ecf20Sopenharmony_ci 25488c2ecf20Sopenharmony_ci 25498c2ecf20Sopenharmony_ci.endm 25508c2ecf20Sopenharmony_ci 25518c2ecf20Sopenharmony_ci 25528c2ecf20Sopenharmony_ci# GHASH the last 4 ciphertext blocks. 25538c2ecf20Sopenharmony_ci.macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 25548c2ecf20Sopenharmony_ci 25558c2ecf20Sopenharmony_ci ## Karatsuba Method 25568c2ecf20Sopenharmony_ci 25578c2ecf20Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 25588c2ecf20Sopenharmony_ci 25598c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM1, \T2 25608c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 25618c2ecf20Sopenharmony_ci vpxor \XMM1, \T2, \T2 25628c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 25638c2ecf20Sopenharmony_ci 25648c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM1, \T6 25658c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM1, \T7 25668c2ecf20Sopenharmony_ci 25678c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \XMM1 25688c2ecf20Sopenharmony_ci 25698c2ecf20Sopenharmony_ci ###################### 25708c2ecf20Sopenharmony_ci 25718c2ecf20Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 25728c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM2, \T2 25738c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 25748c2ecf20Sopenharmony_ci vpxor \XMM2, \T2, \T2 25758c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM2, \T4 25788c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 25798c2ecf20Sopenharmony_ci 25808c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM2, \T4 25818c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 25868c2ecf20Sopenharmony_ci 25878c2ecf20Sopenharmony_ci ###################### 25888c2ecf20Sopenharmony_ci 25898c2ecf20Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 25908c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM3, \T2 25918c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 25928c2ecf20Sopenharmony_ci vpxor \XMM3, \T2, \T2 25938c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 25948c2ecf20Sopenharmony_ci 25958c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM3, \T4 25968c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 25978c2ecf20Sopenharmony_ci 25988c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM3, \T4 25998c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 26008c2ecf20Sopenharmony_ci 26018c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 26028c2ecf20Sopenharmony_ci 26038c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 26048c2ecf20Sopenharmony_ci 26058c2ecf20Sopenharmony_ci ###################### 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 26088c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM4, \T2 26098c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 26108c2ecf20Sopenharmony_ci vpxor \XMM4, \T2, \T2 26118c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 26128c2ecf20Sopenharmony_ci 26138c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM4, \T4 26148c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 26158c2ecf20Sopenharmony_ci 26168c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM4, \T4 26178c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 26188c2ecf20Sopenharmony_ci 26198c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 26208c2ecf20Sopenharmony_ci 26218c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 26228c2ecf20Sopenharmony_ci 26238c2ecf20Sopenharmony_ci ###################### 26248c2ecf20Sopenharmony_ci 26258c2ecf20Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 26268c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM5, \T2 26278c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 26288c2ecf20Sopenharmony_ci vpxor \XMM5, \T2, \T2 26298c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 26308c2ecf20Sopenharmony_ci 26318c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM5, \T4 26328c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 26338c2ecf20Sopenharmony_ci 26348c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM5, \T4 26358c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 26368c2ecf20Sopenharmony_ci 26378c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 26388c2ecf20Sopenharmony_ci 26398c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 26408c2ecf20Sopenharmony_ci 26418c2ecf20Sopenharmony_ci ###################### 26428c2ecf20Sopenharmony_ci 26438c2ecf20Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 26448c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM6, \T2 26458c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 26468c2ecf20Sopenharmony_ci vpxor \XMM6, \T2, \T2 26478c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 26488c2ecf20Sopenharmony_ci 26498c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM6, \T4 26508c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 26518c2ecf20Sopenharmony_ci 26528c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM6, \T4 26538c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 26548c2ecf20Sopenharmony_ci 26558c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci ###################### 26608c2ecf20Sopenharmony_ci 26618c2ecf20Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 26628c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM7, \T2 26638c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 26648c2ecf20Sopenharmony_ci vpxor \XMM7, \T2, \T2 26658c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM7, \T4 26688c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 26698c2ecf20Sopenharmony_ci 26708c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM7, \T4 26718c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 26728c2ecf20Sopenharmony_ci 26738c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 26748c2ecf20Sopenharmony_ci 26758c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 26768c2ecf20Sopenharmony_ci 26778c2ecf20Sopenharmony_ci ###################### 26788c2ecf20Sopenharmony_ci 26798c2ecf20Sopenharmony_ci vmovdqu HashKey(arg2), \T5 26808c2ecf20Sopenharmony_ci vpshufd $0b01001110, \XMM8, \T2 26818c2ecf20Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 26828c2ecf20Sopenharmony_ci vpxor \XMM8, \T2, \T2 26838c2ecf20Sopenharmony_ci vpxor \T5, \T3, \T3 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM8, \T4 26868c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM8, \T4 26898c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 26908c2ecf20Sopenharmony_ci 26918c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 26948c2ecf20Sopenharmony_ci vpxor \T6, \XMM1, \XMM1 26958c2ecf20Sopenharmony_ci vpxor \T7, \XMM1, \T2 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_ci 26988c2ecf20Sopenharmony_ci 26998c2ecf20Sopenharmony_ci 27008c2ecf20Sopenharmony_ci vpslldq $8, \T2, \T4 27018c2ecf20Sopenharmony_ci vpsrldq $8, \T2, \T2 27028c2ecf20Sopenharmony_ci 27038c2ecf20Sopenharmony_ci vpxor \T4, \T7, \T7 27048c2ecf20Sopenharmony_ci vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the 27058c2ecf20Sopenharmony_ci # accumulated carry-less multiplications 27068c2ecf20Sopenharmony_ci 27078c2ecf20Sopenharmony_ci ####################################################################### 27088c2ecf20Sopenharmony_ci #first phase of the reduction 27098c2ecf20Sopenharmony_ci vmovdqa POLY2(%rip), \T3 27108c2ecf20Sopenharmony_ci 27118c2ecf20Sopenharmony_ci vpclmulqdq $0x01, \T7, \T3, \T2 27128c2ecf20Sopenharmony_ci vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs 27138c2ecf20Sopenharmony_ci 27148c2ecf20Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 27158c2ecf20Sopenharmony_ci ####################################################################### 27168c2ecf20Sopenharmony_ci 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci #second phase of the reduction 27198c2ecf20Sopenharmony_ci vpclmulqdq $0x00, \T7, \T3, \T2 27208c2ecf20Sopenharmony_ci vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) 27218c2ecf20Sopenharmony_ci 27228c2ecf20Sopenharmony_ci vpclmulqdq $0x10, \T7, \T3, \T4 27238c2ecf20Sopenharmony_ci vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) 27248c2ecf20Sopenharmony_ci 27258c2ecf20Sopenharmony_ci vpxor \T2, \T4, \T4 # second phase of the reduction complete 27268c2ecf20Sopenharmony_ci ####################################################################### 27278c2ecf20Sopenharmony_ci vpxor \T4, \T6, \T6 # the result is in T6 27288c2ecf20Sopenharmony_ci.endm 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_ci 27318c2ecf20Sopenharmony_ci 27328c2ecf20Sopenharmony_ci############################################################# 27338c2ecf20Sopenharmony_ci#void aesni_gcm_init_avx_gen4 27348c2ecf20Sopenharmony_ci# (gcm_data *my_ctx_data, 27358c2ecf20Sopenharmony_ci# gcm_context_data *data, 27368c2ecf20Sopenharmony_ci# u8 *iv, /* Pre-counter block j0: 4 byte salt 27378c2ecf20Sopenharmony_ci# (from Security Association) concatenated with 8 byte 27388c2ecf20Sopenharmony_ci# Initialisation Vector (from IPSec ESP Payload) 27398c2ecf20Sopenharmony_ci# concatenated with 0x00000001. 16-byte aligned pointer. */ 27408c2ecf20Sopenharmony_ci# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ 27418c2ecf20Sopenharmony_ci# const u8 *aad, /* Additional Authentication Data (AAD)*/ 27428c2ecf20Sopenharmony_ci# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ 27438c2ecf20Sopenharmony_ci############################################################# 27448c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_init_avx_gen4) 27458c2ecf20Sopenharmony_ci FUNC_SAVE 27468c2ecf20Sopenharmony_ci INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 27478c2ecf20Sopenharmony_ci FUNC_RESTORE 27488c2ecf20Sopenharmony_ci RET 27498c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_init_avx_gen4) 27508c2ecf20Sopenharmony_ci 27518c2ecf20Sopenharmony_ci############################################################################### 27528c2ecf20Sopenharmony_ci#void aesni_gcm_enc_avx_gen4( 27538c2ecf20Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 27548c2ecf20Sopenharmony_ci# gcm_context_data *data, 27558c2ecf20Sopenharmony_ci# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ 27568c2ecf20Sopenharmony_ci# const u8 *in, /* Plaintext input */ 27578c2ecf20Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 27588c2ecf20Sopenharmony_ci############################################################################### 27598c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) 27608c2ecf20Sopenharmony_ci FUNC_SAVE 27618c2ecf20Sopenharmony_ci mov keysize,%eax 27628c2ecf20Sopenharmony_ci cmp $32, %eax 27638c2ecf20Sopenharmony_ci je key_256_enc_update4 27648c2ecf20Sopenharmony_ci cmp $16, %eax 27658c2ecf20Sopenharmony_ci je key_128_enc_update4 27668c2ecf20Sopenharmony_ci # must be 192 27678c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 27688c2ecf20Sopenharmony_ci FUNC_RESTORE 27698c2ecf20Sopenharmony_ci RET 27708c2ecf20Sopenharmony_cikey_128_enc_update4: 27718c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 27728c2ecf20Sopenharmony_ci FUNC_RESTORE 27738c2ecf20Sopenharmony_ci RET 27748c2ecf20Sopenharmony_cikey_256_enc_update4: 27758c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 27768c2ecf20Sopenharmony_ci FUNC_RESTORE 27778c2ecf20Sopenharmony_ci RET 27788c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) 27798c2ecf20Sopenharmony_ci 27808c2ecf20Sopenharmony_ci############################################################################### 27818c2ecf20Sopenharmony_ci#void aesni_gcm_dec_update_avx_gen4( 27828c2ecf20Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 27838c2ecf20Sopenharmony_ci# gcm_context_data *data, 27848c2ecf20Sopenharmony_ci# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ 27858c2ecf20Sopenharmony_ci# const u8 *in, /* Ciphertext input */ 27868c2ecf20Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 27878c2ecf20Sopenharmony_ci############################################################################### 27888c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) 27898c2ecf20Sopenharmony_ci FUNC_SAVE 27908c2ecf20Sopenharmony_ci mov keysize,%eax 27918c2ecf20Sopenharmony_ci cmp $32, %eax 27928c2ecf20Sopenharmony_ci je key_256_dec_update4 27938c2ecf20Sopenharmony_ci cmp $16, %eax 27948c2ecf20Sopenharmony_ci je key_128_dec_update4 27958c2ecf20Sopenharmony_ci # must be 192 27968c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 27978c2ecf20Sopenharmony_ci FUNC_RESTORE 27988c2ecf20Sopenharmony_ci RET 27998c2ecf20Sopenharmony_cikey_128_dec_update4: 28008c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 28018c2ecf20Sopenharmony_ci FUNC_RESTORE 28028c2ecf20Sopenharmony_ci RET 28038c2ecf20Sopenharmony_cikey_256_dec_update4: 28048c2ecf20Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 28058c2ecf20Sopenharmony_ci FUNC_RESTORE 28068c2ecf20Sopenharmony_ci RET 28078c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_ci############################################################################### 28108c2ecf20Sopenharmony_ci#void aesni_gcm_finalize_avx_gen4( 28118c2ecf20Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 28128c2ecf20Sopenharmony_ci# gcm_context_data *data, 28138c2ecf20Sopenharmony_ci# u8 *auth_tag, /* Authenticated Tag output. */ 28148c2ecf20Sopenharmony_ci# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. 28158c2ecf20Sopenharmony_ci# Valid values are 16 (most likely), 12 or 8. */ 28168c2ecf20Sopenharmony_ci############################################################################### 28178c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_finalize_avx_gen4) 28188c2ecf20Sopenharmony_ci FUNC_SAVE 28198c2ecf20Sopenharmony_ci mov keysize,%eax 28208c2ecf20Sopenharmony_ci cmp $32, %eax 28218c2ecf20Sopenharmony_ci je key_256_finalize4 28228c2ecf20Sopenharmony_ci cmp $16, %eax 28238c2ecf20Sopenharmony_ci je key_128_finalize4 28248c2ecf20Sopenharmony_ci # must be 192 28258c2ecf20Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 28268c2ecf20Sopenharmony_ci FUNC_RESTORE 28278c2ecf20Sopenharmony_ci RET 28288c2ecf20Sopenharmony_cikey_128_finalize4: 28298c2ecf20Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 28308c2ecf20Sopenharmony_ci FUNC_RESTORE 28318c2ecf20Sopenharmony_ci RET 28328c2ecf20Sopenharmony_cikey_256_finalize4: 28338c2ecf20Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 28348c2ecf20Sopenharmony_ci FUNC_RESTORE 28358c2ecf20Sopenharmony_ci RET 28368c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_finalize_avx_gen4) 2837