162306a36Sopenharmony_ci######################################################################## 262306a36Sopenharmony_ci# Copyright (c) 2013, Intel Corporation 362306a36Sopenharmony_ci# 462306a36Sopenharmony_ci# This software is available to you under a choice of one of two 562306a36Sopenharmony_ci# licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci# General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci# COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci# OpenIB.org BSD license below: 962306a36Sopenharmony_ci# 1062306a36Sopenharmony_ci# Redistribution and use in source and binary forms, with or without 1162306a36Sopenharmony_ci# modification, are permitted provided that the following conditions are 1262306a36Sopenharmony_ci# met: 1362306a36Sopenharmony_ci# 1462306a36Sopenharmony_ci# * Redistributions of source code must retain the above copyright 1562306a36Sopenharmony_ci# notice, this list of conditions and the following disclaimer. 1662306a36Sopenharmony_ci# 1762306a36Sopenharmony_ci# * Redistributions in binary form must reproduce the above copyright 1862306a36Sopenharmony_ci# notice, this list of conditions and the following disclaimer in the 1962306a36Sopenharmony_ci# documentation and/or other materials provided with the 2062306a36Sopenharmony_ci# distribution. 2162306a36Sopenharmony_ci# 2262306a36Sopenharmony_ci# * Neither the name of the Intel Corporation nor the names of its 2362306a36Sopenharmony_ci# contributors may be used to endorse or promote products derived from 2462306a36Sopenharmony_ci# this software without specific prior written permission. 2562306a36Sopenharmony_ci# 2662306a36Sopenharmony_ci# 2762306a36Sopenharmony_ci# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY 2862306a36Sopenharmony_ci# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2962306a36Sopenharmony_ci# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 3062306a36Sopenharmony_ci# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR 3162306a36Sopenharmony_ci# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 3262306a36Sopenharmony_ci# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 3362306a36Sopenharmony_ci# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR 3462306a36Sopenharmony_ci# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 3562306a36Sopenharmony_ci# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 3662306a36Sopenharmony_ci# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 3762306a36Sopenharmony_ci# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3862306a36Sopenharmony_ci######################################################################## 3962306a36Sopenharmony_ci## 4062306a36Sopenharmony_ci## Authors: 4162306a36Sopenharmony_ci## Erdinc Ozturk <erdinc.ozturk@intel.com> 4262306a36Sopenharmony_ci## Vinodh Gopal <vinodh.gopal@intel.com> 4362306a36Sopenharmony_ci## James Guilford <james.guilford@intel.com> 4462306a36Sopenharmony_ci## Tim Chen <tim.c.chen@linux.intel.com> 4562306a36Sopenharmony_ci## 4662306a36Sopenharmony_ci## References: 4762306a36Sopenharmony_ci## This code was derived and highly optimized from the code described in paper: 4862306a36Sopenharmony_ci## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation 4962306a36Sopenharmony_ci## on Intel Architecture Processors. August, 2010 5062306a36Sopenharmony_ci## The details of the implementation is explained in: 5162306a36Sopenharmony_ci## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode 5262306a36Sopenharmony_ci## on Intel Architecture Processors. October, 2012. 5362306a36Sopenharmony_ci## 5462306a36Sopenharmony_ci## Assumptions: 5562306a36Sopenharmony_ci## 5662306a36Sopenharmony_ci## 5762306a36Sopenharmony_ci## 5862306a36Sopenharmony_ci## iv: 5962306a36Sopenharmony_ci## 0 1 2 3 6062306a36Sopenharmony_ci## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 6162306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 6262306a36Sopenharmony_ci## | Salt (From the SA) | 6362306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 6462306a36Sopenharmony_ci## | Initialization Vector | 6562306a36Sopenharmony_ci## | (This is the sequence number from IPSec header) | 6662306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 6762306a36Sopenharmony_ci## | 0x1 | 6862306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 6962306a36Sopenharmony_ci## 7062306a36Sopenharmony_ci## 7162306a36Sopenharmony_ci## 7262306a36Sopenharmony_ci## AAD: 7362306a36Sopenharmony_ci## AAD padded to 128 bits with 0 7462306a36Sopenharmony_ci## for example, assume AAD is a u32 vector 7562306a36Sopenharmony_ci## 7662306a36Sopenharmony_ci## if AAD is 8 bytes: 7762306a36Sopenharmony_ci## AAD[3] = {A0, A1}# 7862306a36Sopenharmony_ci## padded AAD in xmm register = {A1 A0 0 0} 7962306a36Sopenharmony_ci## 8062306a36Sopenharmony_ci## 0 1 2 3 8162306a36Sopenharmony_ci## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 8262306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 8362306a36Sopenharmony_ci## | SPI (A1) | 8462306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 8562306a36Sopenharmony_ci## | 32-bit Sequence Number (A0) | 8662306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 8762306a36Sopenharmony_ci## | 0x0 | 8862306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 8962306a36Sopenharmony_ci## 9062306a36Sopenharmony_ci## AAD Format with 32-bit Sequence Number 9162306a36Sopenharmony_ci## 9262306a36Sopenharmony_ci## if AAD is 12 bytes: 9362306a36Sopenharmony_ci## AAD[3] = {A0, A1, A2}# 9462306a36Sopenharmony_ci## padded AAD in xmm register = {A2 A1 A0 0} 9562306a36Sopenharmony_ci## 9662306a36Sopenharmony_ci## 0 1 2 3 9762306a36Sopenharmony_ci## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 9862306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 9962306a36Sopenharmony_ci## | SPI (A2) | 10062306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 10162306a36Sopenharmony_ci## | 64-bit Extended Sequence Number {A1,A0} | 10262306a36Sopenharmony_ci## | | 10362306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 10462306a36Sopenharmony_ci## | 0x0 | 10562306a36Sopenharmony_ci## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 10662306a36Sopenharmony_ci## 10762306a36Sopenharmony_ci## AAD Format with 64-bit Extended Sequence Number 10862306a36Sopenharmony_ci## 10962306a36Sopenharmony_ci## 11062306a36Sopenharmony_ci## aadLen: 11162306a36Sopenharmony_ci## from the definition of the spec, aadLen can only be 8 or 12 bytes. 11262306a36Sopenharmony_ci## The code additionally supports aadLen of length 16 bytes. 11362306a36Sopenharmony_ci## 11462306a36Sopenharmony_ci## TLen: 11562306a36Sopenharmony_ci## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. 11662306a36Sopenharmony_ci## 11762306a36Sopenharmony_ci## poly = x^128 + x^127 + x^126 + x^121 + 1 11862306a36Sopenharmony_ci## throughout the code, one tab and two tab indentations are used. one tab is 11962306a36Sopenharmony_ci## for GHASH part, two tabs is for AES part. 12062306a36Sopenharmony_ci## 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci#include <linux/linkage.h> 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci# constants in mergeable sections, linker can reorder and merge 12562306a36Sopenharmony_ci.section .rodata.cst16.POLY, "aM", @progbits, 16 12662306a36Sopenharmony_ci.align 16 12762306a36Sopenharmony_ciPOLY: .octa 0xC2000000000000000000000000000001 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci.section .rodata.cst16.POLY2, "aM", @progbits, 16 13062306a36Sopenharmony_ci.align 16 13162306a36Sopenharmony_ciPOLY2: .octa 0xC20000000000000000000001C2000000 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci.section .rodata.cst16.TWOONE, "aM", @progbits, 16 13462306a36Sopenharmony_ci.align 16 13562306a36Sopenharmony_ciTWOONE: .octa 0x00000001000000000000000000000001 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 13862306a36Sopenharmony_ci.align 16 13962306a36Sopenharmony_ciSHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci.section .rodata.cst16.ONE, "aM", @progbits, 16 14262306a36Sopenharmony_ci.align 16 14362306a36Sopenharmony_ciONE: .octa 0x00000000000000000000000000000001 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci.section .rodata.cst16.ONEf, "aM", @progbits, 16 14662306a36Sopenharmony_ci.align 16 14762306a36Sopenharmony_ciONEf: .octa 0x01000000000000000000000000000000 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci# order of these constants should not change. 15062306a36Sopenharmony_ci# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F 15162306a36Sopenharmony_ci.section .rodata, "a", @progbits 15262306a36Sopenharmony_ci.align 16 15362306a36Sopenharmony_ciSHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 15462306a36Sopenharmony_ciALL_F: .octa 0xffffffffffffffffffffffffffffffff 15562306a36Sopenharmony_ci .octa 0x00000000000000000000000000000000 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci.text 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci#define AadHash 16*0 16162306a36Sopenharmony_ci#define AadLen 16*1 16262306a36Sopenharmony_ci#define InLen (16*1)+8 16362306a36Sopenharmony_ci#define PBlockEncKey 16*2 16462306a36Sopenharmony_ci#define OrigIV 16*3 16562306a36Sopenharmony_ci#define CurCount 16*4 16662306a36Sopenharmony_ci#define PBlockLen 16*5 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ciHashKey = 16*6 # store HashKey <<1 mod poly here 16962306a36Sopenharmony_ciHashKey_2 = 16*7 # store HashKey^2 <<1 mod poly here 17062306a36Sopenharmony_ciHashKey_3 = 16*8 # store HashKey^3 <<1 mod poly here 17162306a36Sopenharmony_ciHashKey_4 = 16*9 # store HashKey^4 <<1 mod poly here 17262306a36Sopenharmony_ciHashKey_5 = 16*10 # store HashKey^5 <<1 mod poly here 17362306a36Sopenharmony_ciHashKey_6 = 16*11 # store HashKey^6 <<1 mod poly here 17462306a36Sopenharmony_ciHashKey_7 = 16*12 # store HashKey^7 <<1 mod poly here 17562306a36Sopenharmony_ciHashKey_8 = 16*13 # store HashKey^8 <<1 mod poly here 17662306a36Sopenharmony_ciHashKey_k = 16*14 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) 17762306a36Sopenharmony_ciHashKey_2_k = 16*15 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) 17862306a36Sopenharmony_ciHashKey_3_k = 16*16 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) 17962306a36Sopenharmony_ciHashKey_4_k = 16*17 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) 18062306a36Sopenharmony_ciHashKey_5_k = 16*18 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) 18162306a36Sopenharmony_ciHashKey_6_k = 16*19 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) 18262306a36Sopenharmony_ciHashKey_7_k = 16*20 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) 18362306a36Sopenharmony_ciHashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci#define arg1 %rdi 18662306a36Sopenharmony_ci#define arg2 %rsi 18762306a36Sopenharmony_ci#define arg3 %rdx 18862306a36Sopenharmony_ci#define arg4 %rcx 18962306a36Sopenharmony_ci#define arg5 %r8 19062306a36Sopenharmony_ci#define arg6 %r9 19162306a36Sopenharmony_ci#define keysize 2*15*16(arg1) 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cii = 0 19462306a36Sopenharmony_cij = 0 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ciout_order = 0 19762306a36Sopenharmony_ciin_order = 1 19862306a36Sopenharmony_ciDEC = 0 19962306a36Sopenharmony_ciENC = 1 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci.macro define_reg r n 20262306a36Sopenharmony_cireg_\r = %xmm\n 20362306a36Sopenharmony_ci.endm 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci.macro setreg 20662306a36Sopenharmony_ci.altmacro 20762306a36Sopenharmony_cidefine_reg i %i 20862306a36Sopenharmony_cidefine_reg j %j 20962306a36Sopenharmony_ci.noaltmacro 21062306a36Sopenharmony_ci.endm 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ciTMP1 = 16*0 # Temporary storage for AAD 21362306a36Sopenharmony_ciTMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) 21462306a36Sopenharmony_ciTMP3 = 16*2 # Temporary storage for AES State 3 21562306a36Sopenharmony_ciTMP4 = 16*3 # Temporary storage for AES State 4 21662306a36Sopenharmony_ciTMP5 = 16*4 # Temporary storage for AES State 5 21762306a36Sopenharmony_ciTMP6 = 16*5 # Temporary storage for AES State 6 21862306a36Sopenharmony_ciTMP7 = 16*6 # Temporary storage for AES State 7 21962306a36Sopenharmony_ciTMP8 = 16*7 # Temporary storage for AES State 8 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ciVARIABLE_OFFSET = 16*8 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci################################ 22462306a36Sopenharmony_ci# Utility Macros 22562306a36Sopenharmony_ci################################ 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci.macro FUNC_SAVE 22862306a36Sopenharmony_ci push %r12 22962306a36Sopenharmony_ci push %r13 23062306a36Sopenharmony_ci push %r15 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci push %rbp 23362306a36Sopenharmony_ci mov %rsp, %rbp 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci sub $VARIABLE_OFFSET, %rsp 23662306a36Sopenharmony_ci and $~63, %rsp # align rsp to 64 bytes 23762306a36Sopenharmony_ci.endm 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci.macro FUNC_RESTORE 24062306a36Sopenharmony_ci mov %rbp, %rsp 24162306a36Sopenharmony_ci pop %rbp 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci pop %r15 24462306a36Sopenharmony_ci pop %r13 24562306a36Sopenharmony_ci pop %r12 24662306a36Sopenharmony_ci.endm 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci# Encryption of a single block 24962306a36Sopenharmony_ci.macro ENCRYPT_SINGLE_BLOCK REP XMM0 25062306a36Sopenharmony_ci vpxor (arg1), \XMM0, \XMM0 25162306a36Sopenharmony_ci i = 1 25262306a36Sopenharmony_ci setreg 25362306a36Sopenharmony_ci.rep \REP 25462306a36Sopenharmony_ci vaesenc 16*i(arg1), \XMM0, \XMM0 25562306a36Sopenharmony_ci i = (i+1) 25662306a36Sopenharmony_ci setreg 25762306a36Sopenharmony_ci.endr 25862306a36Sopenharmony_ci vaesenclast 16*i(arg1), \XMM0, \XMM0 25962306a36Sopenharmony_ci.endm 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci# combined for GCM encrypt and decrypt functions 26262306a36Sopenharmony_ci# clobbering all xmm registers 26362306a36Sopenharmony_ci# clobbering r10, r11, r12, r13, r15, rax 26462306a36Sopenharmony_ci.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP 26562306a36Sopenharmony_ci vmovdqu AadHash(arg2), %xmm8 26662306a36Sopenharmony_ci vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey 26762306a36Sopenharmony_ci add arg5, InLen(arg2) 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci # initialize the data pointer offset as zero 27062306a36Sopenharmony_ci xor %r11d, %r11d 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC 27362306a36Sopenharmony_ci sub %r11, arg5 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci mov arg5, %r13 # save the number of bytes of plaintext/ciphertext 27662306a36Sopenharmony_ci and $-16, %r13 # r13 = r13 - (r13 mod 16) 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci mov %r13, %r12 27962306a36Sopenharmony_ci shr $4, %r12 28062306a36Sopenharmony_ci and $7, %r12 28162306a36Sopenharmony_ci jz .L_initial_num_blocks_is_0\@ 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci cmp $7, %r12 28462306a36Sopenharmony_ci je .L_initial_num_blocks_is_7\@ 28562306a36Sopenharmony_ci cmp $6, %r12 28662306a36Sopenharmony_ci je .L_initial_num_blocks_is_6\@ 28762306a36Sopenharmony_ci cmp $5, %r12 28862306a36Sopenharmony_ci je .L_initial_num_blocks_is_5\@ 28962306a36Sopenharmony_ci cmp $4, %r12 29062306a36Sopenharmony_ci je .L_initial_num_blocks_is_4\@ 29162306a36Sopenharmony_ci cmp $3, %r12 29262306a36Sopenharmony_ci je .L_initial_num_blocks_is_3\@ 29362306a36Sopenharmony_ci cmp $2, %r12 29462306a36Sopenharmony_ci je .L_initial_num_blocks_is_2\@ 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci jmp .L_initial_num_blocks_is_1\@ 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci.L_initial_num_blocks_is_7\@: 29962306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 30062306a36Sopenharmony_ci sub $16*7, %r13 30162306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci.L_initial_num_blocks_is_6\@: 30462306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 30562306a36Sopenharmony_ci sub $16*6, %r13 30662306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci.L_initial_num_blocks_is_5\@: 30962306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 31062306a36Sopenharmony_ci sub $16*5, %r13 31162306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci.L_initial_num_blocks_is_4\@: 31462306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 31562306a36Sopenharmony_ci sub $16*4, %r13 31662306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci.L_initial_num_blocks_is_3\@: 31962306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 32062306a36Sopenharmony_ci sub $16*3, %r13 32162306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci.L_initial_num_blocks_is_2\@: 32462306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 32562306a36Sopenharmony_ci sub $16*2, %r13 32662306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci.L_initial_num_blocks_is_1\@: 32962306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 33062306a36Sopenharmony_ci sub $16*1, %r13 33162306a36Sopenharmony_ci jmp .L_initial_blocks_encrypted\@ 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci.L_initial_num_blocks_is_0\@: 33462306a36Sopenharmony_ci \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci.L_initial_blocks_encrypted\@: 33862306a36Sopenharmony_ci test %r13, %r13 33962306a36Sopenharmony_ci je .L_zero_cipher_left\@ 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci sub $128, %r13 34262306a36Sopenharmony_ci je .L_eight_cipher_left\@ 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci vmovd %xmm9, %r15d 34862306a36Sopenharmony_ci and $255, %r15d 34962306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci.L_encrypt_by_8_new\@: 35362306a36Sopenharmony_ci cmp $(255-8), %r15d 35462306a36Sopenharmony_ci jg .L_encrypt_by_8\@ 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci add $8, %r15b 35962306a36Sopenharmony_ci \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC 36062306a36Sopenharmony_ci add $128, %r11 36162306a36Sopenharmony_ci sub $128, %r13 36262306a36Sopenharmony_ci jne .L_encrypt_by_8_new\@ 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 36562306a36Sopenharmony_ci jmp .L_eight_cipher_left\@ 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci.L_encrypt_by_8\@: 36862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 36962306a36Sopenharmony_ci add $8, %r15b 37062306a36Sopenharmony_ci \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC 37162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 37262306a36Sopenharmony_ci add $128, %r11 37362306a36Sopenharmony_ci sub $128, %r13 37462306a36Sopenharmony_ci jne .L_encrypt_by_8_new\@ 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci.L_eight_cipher_left\@: 38262306a36Sopenharmony_ci \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci.L_zero_cipher_left\@: 38662306a36Sopenharmony_ci vmovdqu %xmm14, AadHash(arg2) 38762306a36Sopenharmony_ci vmovdqu %xmm9, CurCount(arg2) 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci # check for 0 length 39062306a36Sopenharmony_ci mov arg5, %r13 39162306a36Sopenharmony_ci and $15, %r13 # r13 = (arg5 mod 16) 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci je .L_multiple_of_16_bytes\@ 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci # handle the last <16 Byte block separately 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci mov %r13, PBlockLen(arg2) 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn 40062306a36Sopenharmony_ci vmovdqu %xmm9, CurCount(arg2) 40162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn) 40462306a36Sopenharmony_ci vmovdqu %xmm9, PBlockEncKey(arg2) 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci cmp $16, arg5 40762306a36Sopenharmony_ci jge .L_large_enough_update\@ 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci lea (arg4,%r11,1), %r10 41062306a36Sopenharmony_ci mov %r13, %r12 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm1 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci lea SHIFT_MASK+16(%rip), %r12 41562306a36Sopenharmony_ci sub %r13, %r12 # adjust the shuffle mask pointer to be 41662306a36Sopenharmony_ci # able to shift 16-r13 bytes (r13 is the 41762306a36Sopenharmony_ci # number of bytes in plaintext mod 16) 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci jmp .L_final_ghash_mul\@ 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci.L_large_enough_update\@: 42262306a36Sopenharmony_ci sub $16, %r11 42362306a36Sopenharmony_ci add %r13, %r11 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci # receive the last <16 Byte block 42662306a36Sopenharmony_ci vmovdqu (arg4, %r11, 1), %xmm1 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci sub %r13, %r11 42962306a36Sopenharmony_ci add $16, %r11 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci lea SHIFT_MASK+16(%rip), %r12 43262306a36Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift 16-r13 bytes 43362306a36Sopenharmony_ci # (r13 is the number of bytes in plaintext mod 16) 43462306a36Sopenharmony_ci sub %r13, %r12 43562306a36Sopenharmony_ci # get the appropriate shuffle mask 43662306a36Sopenharmony_ci vmovdqu (%r12), %xmm2 43762306a36Sopenharmony_ci # shift right 16-r13 bytes 43862306a36Sopenharmony_ci vpshufb %xmm2, %xmm1, %xmm1 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci.L_final_ghash_mul\@: 44162306a36Sopenharmony_ci .if \ENC_DEC == DEC 44262306a36Sopenharmony_ci vmovdqa %xmm1, %xmm2 44362306a36Sopenharmony_ci vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) 44462306a36Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to 44562306a36Sopenharmony_ci # mask out top 16-r13 bytes of xmm9 44662306a36Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 44762306a36Sopenharmony_ci vpand %xmm1, %xmm2, %xmm2 44862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 44962306a36Sopenharmony_ci vpxor %xmm2, %xmm14, %xmm14 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci vmovdqu %xmm14, AadHash(arg2) 45262306a36Sopenharmony_ci .else 45362306a36Sopenharmony_ci vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) 45462306a36Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to 45562306a36Sopenharmony_ci # mask out top 16-r13 bytes of xmm9 45662306a36Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 45762306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 45862306a36Sopenharmony_ci vpxor %xmm9, %xmm14, %xmm14 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci vmovdqu %xmm14, AadHash(arg2) 46162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext 46262306a36Sopenharmony_ci .endif 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci ############################# 46662306a36Sopenharmony_ci # output r13 Bytes 46762306a36Sopenharmony_ci vmovq %xmm9, %rax 46862306a36Sopenharmony_ci cmp $8, %r13 46962306a36Sopenharmony_ci jle .L_less_than_8_bytes_left\@ 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci mov %rax, (arg3 , %r11) 47262306a36Sopenharmony_ci add $8, %r11 47362306a36Sopenharmony_ci vpsrldq $8, %xmm9, %xmm9 47462306a36Sopenharmony_ci vmovq %xmm9, %rax 47562306a36Sopenharmony_ci sub $8, %r13 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci.L_less_than_8_bytes_left\@: 47862306a36Sopenharmony_ci movb %al, (arg3 , %r11) 47962306a36Sopenharmony_ci add $1, %r11 48062306a36Sopenharmony_ci shr $8, %rax 48162306a36Sopenharmony_ci sub $1, %r13 48262306a36Sopenharmony_ci jne .L_less_than_8_bytes_left\@ 48362306a36Sopenharmony_ci ############################# 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci.L_multiple_of_16_bytes\@: 48662306a36Sopenharmony_ci.endm 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci# GCM_COMPLETE Finishes update of tag of last partial block 49062306a36Sopenharmony_ci# Output: Authorization Tag (AUTH_TAG) 49162306a36Sopenharmony_ci# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 49262306a36Sopenharmony_ci.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN 49362306a36Sopenharmony_ci vmovdqu AadHash(arg2), %xmm14 49462306a36Sopenharmony_ci vmovdqu HashKey(arg2), %xmm13 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci mov PBlockLen(arg2), %r12 49762306a36Sopenharmony_ci test %r12, %r12 49862306a36Sopenharmony_ci je .L_partial_done\@ 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci #GHASH computation for the last <16 Byte block 50162306a36Sopenharmony_ci \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci.L_partial_done\@: 50462306a36Sopenharmony_ci mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) 50562306a36Sopenharmony_ci shl $3, %r12 # convert into number of bits 50662306a36Sopenharmony_ci vmovd %r12d, %xmm15 # len(A) in xmm15 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci mov InLen(arg2), %r12 50962306a36Sopenharmony_ci shl $3, %r12 # len(C) in bits (*128) 51062306a36Sopenharmony_ci vmovq %r12, %xmm1 51162306a36Sopenharmony_ci vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 51262306a36Sopenharmony_ci vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci vpxor %xmm15, %xmm14, %xmm14 51562306a36Sopenharmony_ci \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation 51662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci vmovdqu OrigIV(arg2), %xmm9 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0) 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci vpxor %xmm14, %xmm9, %xmm9 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci.L_return_T\@: 52762306a36Sopenharmony_ci mov \AUTH_TAG, %r10 # r10 = authTag 52862306a36Sopenharmony_ci mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci cmp $16, %r11 53162306a36Sopenharmony_ci je .L_T_16\@ 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci cmp $8, %r11 53462306a36Sopenharmony_ci jl .L_T_4\@ 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci.L_T_8\@: 53762306a36Sopenharmony_ci vmovq %xmm9, %rax 53862306a36Sopenharmony_ci mov %rax, (%r10) 53962306a36Sopenharmony_ci add $8, %r10 54062306a36Sopenharmony_ci sub $8, %r11 54162306a36Sopenharmony_ci vpsrldq $8, %xmm9, %xmm9 54262306a36Sopenharmony_ci test %r11, %r11 54362306a36Sopenharmony_ci je .L_return_T_done\@ 54462306a36Sopenharmony_ci.L_T_4\@: 54562306a36Sopenharmony_ci vmovd %xmm9, %eax 54662306a36Sopenharmony_ci mov %eax, (%r10) 54762306a36Sopenharmony_ci add $4, %r10 54862306a36Sopenharmony_ci sub $4, %r11 54962306a36Sopenharmony_ci vpsrldq $4, %xmm9, %xmm9 55062306a36Sopenharmony_ci test %r11, %r11 55162306a36Sopenharmony_ci je .L_return_T_done\@ 55262306a36Sopenharmony_ci.L_T_123\@: 55362306a36Sopenharmony_ci vmovd %xmm9, %eax 55462306a36Sopenharmony_ci cmp $2, %r11 55562306a36Sopenharmony_ci jl .L_T_1\@ 55662306a36Sopenharmony_ci mov %ax, (%r10) 55762306a36Sopenharmony_ci cmp $2, %r11 55862306a36Sopenharmony_ci je .L_return_T_done\@ 55962306a36Sopenharmony_ci add $2, %r10 56062306a36Sopenharmony_ci sar $16, %eax 56162306a36Sopenharmony_ci.L_T_1\@: 56262306a36Sopenharmony_ci mov %al, (%r10) 56362306a36Sopenharmony_ci jmp .L_return_T_done\@ 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci.L_T_16\@: 56662306a36Sopenharmony_ci vmovdqu %xmm9, (%r10) 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci.L_return_T_done\@: 56962306a36Sopenharmony_ci.endm 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci mov \AAD, %r10 # r10 = AAD 57462306a36Sopenharmony_ci mov \AADLEN, %r12 # r12 = aadLen 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci mov %r12, %r11 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci vpxor \T8, \T8, \T8 58062306a36Sopenharmony_ci vpxor \T7, \T7, \T7 58162306a36Sopenharmony_ci cmp $16, %r11 58262306a36Sopenharmony_ci jl .L_get_AAD_rest8\@ 58362306a36Sopenharmony_ci.L_get_AAD_blocks\@: 58462306a36Sopenharmony_ci vmovdqu (%r10), \T7 58562306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \T7, \T7 58662306a36Sopenharmony_ci vpxor \T7, \T8, \T8 58762306a36Sopenharmony_ci \GHASH_MUL \T8, \T2, \T1, \T3, \T4, \T5, \T6 58862306a36Sopenharmony_ci add $16, %r10 58962306a36Sopenharmony_ci sub $16, %r12 59062306a36Sopenharmony_ci sub $16, %r11 59162306a36Sopenharmony_ci cmp $16, %r11 59262306a36Sopenharmony_ci jge .L_get_AAD_blocks\@ 59362306a36Sopenharmony_ci vmovdqu \T8, \T7 59462306a36Sopenharmony_ci test %r11, %r11 59562306a36Sopenharmony_ci je .L_get_AAD_done\@ 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci vpxor \T7, \T7, \T7 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci /* read the last <16B of AAD. since we have at least 4B of 60062306a36Sopenharmony_ci data right after the AAD (the ICV, and maybe some CT), we can 60162306a36Sopenharmony_ci read 4B/8B blocks safely, and then get rid of the extra stuff */ 60262306a36Sopenharmony_ci.L_get_AAD_rest8\@: 60362306a36Sopenharmony_ci cmp $4, %r11 60462306a36Sopenharmony_ci jle .L_get_AAD_rest4\@ 60562306a36Sopenharmony_ci movq (%r10), \T1 60662306a36Sopenharmony_ci add $8, %r10 60762306a36Sopenharmony_ci sub $8, %r11 60862306a36Sopenharmony_ci vpslldq $8, \T1, \T1 60962306a36Sopenharmony_ci vpsrldq $8, \T7, \T7 61062306a36Sopenharmony_ci vpxor \T1, \T7, \T7 61162306a36Sopenharmony_ci jmp .L_get_AAD_rest8\@ 61262306a36Sopenharmony_ci.L_get_AAD_rest4\@: 61362306a36Sopenharmony_ci test %r11, %r11 61462306a36Sopenharmony_ci jle .L_get_AAD_rest0\@ 61562306a36Sopenharmony_ci mov (%r10), %eax 61662306a36Sopenharmony_ci movq %rax, \T1 61762306a36Sopenharmony_ci add $4, %r10 61862306a36Sopenharmony_ci sub $4, %r11 61962306a36Sopenharmony_ci vpslldq $12, \T1, \T1 62062306a36Sopenharmony_ci vpsrldq $4, \T7, \T7 62162306a36Sopenharmony_ci vpxor \T1, \T7, \T7 62262306a36Sopenharmony_ci.L_get_AAD_rest0\@: 62362306a36Sopenharmony_ci /* finalize: shift out the extra bytes we read, and align 62462306a36Sopenharmony_ci left. since pslldq can only shift by an immediate, we use 62562306a36Sopenharmony_ci vpshufb and a pair of shuffle masks */ 62662306a36Sopenharmony_ci leaq ALL_F(%rip), %r11 62762306a36Sopenharmony_ci subq %r12, %r11 62862306a36Sopenharmony_ci vmovdqu 16(%r11), \T1 62962306a36Sopenharmony_ci andq $~3, %r11 63062306a36Sopenharmony_ci vpshufb (%r11), \T7, \T7 63162306a36Sopenharmony_ci vpand \T1, \T7, \T7 63262306a36Sopenharmony_ci.L_get_AAD_rest_final\@: 63362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \T7, \T7 63462306a36Sopenharmony_ci vpxor \T8, \T7, \T7 63562306a36Sopenharmony_ci \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci.L_get_AAD_done\@: 63862306a36Sopenharmony_ci vmovdqu \T7, AadHash(arg2) 63962306a36Sopenharmony_ci.endm 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci.macro INIT GHASH_MUL PRECOMPUTE 64262306a36Sopenharmony_ci mov arg6, %r11 64362306a36Sopenharmony_ci mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length 64462306a36Sopenharmony_ci xor %r11d, %r11d 64562306a36Sopenharmony_ci mov %r11, InLen(arg2) # ctx_data.in_length = 0 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0 64862306a36Sopenharmony_ci mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0 64962306a36Sopenharmony_ci mov arg3, %rax 65062306a36Sopenharmony_ci movdqu (%rax), %xmm0 65162306a36Sopenharmony_ci movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm0, %xmm0 65462306a36Sopenharmony_ci movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci vmovdqu (arg4), %xmm6 # xmm6 = HashKey 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 65962306a36Sopenharmony_ci ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey 66062306a36Sopenharmony_ci vmovdqa %xmm6, %xmm2 66162306a36Sopenharmony_ci vpsllq $1, %xmm6, %xmm6 66262306a36Sopenharmony_ci vpsrlq $63, %xmm2, %xmm2 66362306a36Sopenharmony_ci vmovdqa %xmm2, %xmm1 66462306a36Sopenharmony_ci vpslldq $8, %xmm2, %xmm2 66562306a36Sopenharmony_ci vpsrldq $8, %xmm1, %xmm1 66662306a36Sopenharmony_ci vpor %xmm2, %xmm6, %xmm6 66762306a36Sopenharmony_ci #reduction 66862306a36Sopenharmony_ci vpshufd $0b00100100, %xmm1, %xmm2 66962306a36Sopenharmony_ci vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 67062306a36Sopenharmony_ci vpand POLY(%rip), %xmm2, %xmm2 67162306a36Sopenharmony_ci vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly 67262306a36Sopenharmony_ci ####################################################################### 67362306a36Sopenharmony_ci vmovdqu %xmm6, HashKey(arg2) # store HashKey<<1 mod poly 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci \PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 67862306a36Sopenharmony_ci.endm 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci# Reads DLEN bytes starting at DPTR and stores in XMMDst 68262306a36Sopenharmony_ci# where 0 < DLEN < 16 68362306a36Sopenharmony_ci# Clobbers %rax, DLEN 68462306a36Sopenharmony_ci.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst 68562306a36Sopenharmony_ci vpxor \XMMDst, \XMMDst, \XMMDst 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci cmp $8, \DLEN 68862306a36Sopenharmony_ci jl .L_read_lt8_\@ 68962306a36Sopenharmony_ci mov (\DPTR), %rax 69062306a36Sopenharmony_ci vpinsrq $0, %rax, \XMMDst, \XMMDst 69162306a36Sopenharmony_ci sub $8, \DLEN 69262306a36Sopenharmony_ci jz .L_done_read_partial_block_\@ 69362306a36Sopenharmony_ci xor %eax, %eax 69462306a36Sopenharmony_ci.L_read_next_byte_\@: 69562306a36Sopenharmony_ci shl $8, %rax 69662306a36Sopenharmony_ci mov 7(\DPTR, \DLEN, 1), %al 69762306a36Sopenharmony_ci dec \DLEN 69862306a36Sopenharmony_ci jnz .L_read_next_byte_\@ 69962306a36Sopenharmony_ci vpinsrq $1, %rax, \XMMDst, \XMMDst 70062306a36Sopenharmony_ci jmp .L_done_read_partial_block_\@ 70162306a36Sopenharmony_ci.L_read_lt8_\@: 70262306a36Sopenharmony_ci xor %eax, %eax 70362306a36Sopenharmony_ci.L_read_next_byte_lt8_\@: 70462306a36Sopenharmony_ci shl $8, %rax 70562306a36Sopenharmony_ci mov -1(\DPTR, \DLEN, 1), %al 70662306a36Sopenharmony_ci dec \DLEN 70762306a36Sopenharmony_ci jnz .L_read_next_byte_lt8_\@ 70862306a36Sopenharmony_ci vpinsrq $0, %rax, \XMMDst, \XMMDst 70962306a36Sopenharmony_ci.L_done_read_partial_block_\@: 71062306a36Sopenharmony_ci.endm 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks 71362306a36Sopenharmony_ci# between update calls. 71462306a36Sopenharmony_ci# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK 71562306a36Sopenharmony_ci# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context 71662306a36Sopenharmony_ci# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 71762306a36Sopenharmony_ci.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ 71862306a36Sopenharmony_ci AAD_HASH ENC_DEC 71962306a36Sopenharmony_ci mov PBlockLen(arg2), %r13 72062306a36Sopenharmony_ci test %r13, %r13 72162306a36Sopenharmony_ci je .L_partial_block_done_\@ # Leave Macro if no partial blocks 72262306a36Sopenharmony_ci # Read in input data without over reading 72362306a36Sopenharmony_ci cmp $16, \PLAIN_CYPH_LEN 72462306a36Sopenharmony_ci jl .L_fewer_than_16_bytes_\@ 72562306a36Sopenharmony_ci vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm 72662306a36Sopenharmony_ci jmp .L_data_read_\@ 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci.L_fewer_than_16_bytes_\@: 72962306a36Sopenharmony_ci lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 73062306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r12 73162306a36Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm1 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci mov PBlockLen(arg2), %r13 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci.L_data_read_\@: # Finished reading in data 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci vmovdqu PBlockEncKey(arg2), %xmm9 73862306a36Sopenharmony_ci vmovdqu HashKey(arg2), %xmm13 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci lea SHIFT_MASK(%rip), %r12 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift r13 bytes 74362306a36Sopenharmony_ci # r16-r13 is the number of bytes in plaintext mod 16) 74462306a36Sopenharmony_ci add %r13, %r12 74562306a36Sopenharmony_ci vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask 74662306a36Sopenharmony_ci vpshufb %xmm2, %xmm9, %xmm9 # shift right r13 bytes 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci.if \ENC_DEC == DEC 74962306a36Sopenharmony_ci vmovdqa %xmm1, %xmm3 75062306a36Sopenharmony_ci pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 75362306a36Sopenharmony_ci add %r13, %r10 75462306a36Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 75562306a36Sopenharmony_ci sub $16, %r10 75662306a36Sopenharmony_ci # Determine if if partial block is not being filled and 75762306a36Sopenharmony_ci # shift mask accordingly 75862306a36Sopenharmony_ci jge .L_no_extra_mask_1_\@ 75962306a36Sopenharmony_ci sub %r10, %r12 76062306a36Sopenharmony_ci.L_no_extra_mask_1_\@: 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 76362306a36Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 76462306a36Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 # mask out bottom r13 bytes of xmm9 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci vpand %xmm1, %xmm3, %xmm3 76762306a36Sopenharmony_ci vmovdqa SHUF_MASK(%rip), %xmm10 76862306a36Sopenharmony_ci vpshufb %xmm10, %xmm3, %xmm3 76962306a36Sopenharmony_ci vpshufb %xmm2, %xmm3, %xmm3 77062306a36Sopenharmony_ci vpxor %xmm3, \AAD_HASH, \AAD_HASH 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci test %r10, %r10 77362306a36Sopenharmony_ci jl .L_partial_incomplete_1_\@ 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci # GHASH computation for the last <16 Byte block 77662306a36Sopenharmony_ci \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 77762306a36Sopenharmony_ci xor %eax,%eax 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci mov %rax, PBlockLen(arg2) 78062306a36Sopenharmony_ci jmp .L_dec_done_\@ 78162306a36Sopenharmony_ci.L_partial_incomplete_1_\@: 78262306a36Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(arg2) 78362306a36Sopenharmony_ci.L_dec_done_\@: 78462306a36Sopenharmony_ci vmovdqu \AAD_HASH, AadHash(arg2) 78562306a36Sopenharmony_ci.else 78662306a36Sopenharmony_ci vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 78962306a36Sopenharmony_ci add %r13, %r10 79062306a36Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 79162306a36Sopenharmony_ci sub $16, %r10 79262306a36Sopenharmony_ci # Determine if if partial block is not being filled and 79362306a36Sopenharmony_ci # shift mask accordingly 79462306a36Sopenharmony_ci jge .L_no_extra_mask_2_\@ 79562306a36Sopenharmony_ci sub %r10, %r12 79662306a36Sopenharmony_ci.L_no_extra_mask_2_\@: 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 79962306a36Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 80062306a36Sopenharmony_ci vpand %xmm1, %xmm9, %xmm9 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci vmovdqa SHUF_MASK(%rip), %xmm1 80362306a36Sopenharmony_ci vpshufb %xmm1, %xmm9, %xmm9 80462306a36Sopenharmony_ci vpshufb %xmm2, %xmm9, %xmm9 80562306a36Sopenharmony_ci vpxor %xmm9, \AAD_HASH, \AAD_HASH 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci test %r10, %r10 80862306a36Sopenharmony_ci jl .L_partial_incomplete_2_\@ 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci # GHASH computation for the last <16 Byte block 81162306a36Sopenharmony_ci \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 81262306a36Sopenharmony_ci xor %eax,%eax 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci mov %rax, PBlockLen(arg2) 81562306a36Sopenharmony_ci jmp .L_encode_done_\@ 81662306a36Sopenharmony_ci.L_partial_incomplete_2_\@: 81762306a36Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(arg2) 81862306a36Sopenharmony_ci.L_encode_done_\@: 81962306a36Sopenharmony_ci vmovdqu \AAD_HASH, AadHash(arg2) 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci vmovdqa SHUF_MASK(%rip), %xmm10 82262306a36Sopenharmony_ci # shuffle xmm9 back to output as ciphertext 82362306a36Sopenharmony_ci vpshufb %xmm10, %xmm9, %xmm9 82462306a36Sopenharmony_ci vpshufb %xmm2, %xmm9, %xmm9 82562306a36Sopenharmony_ci.endif 82662306a36Sopenharmony_ci # output encrypted Bytes 82762306a36Sopenharmony_ci test %r10, %r10 82862306a36Sopenharmony_ci jl .L_partial_fill_\@ 82962306a36Sopenharmony_ci mov %r13, %r12 83062306a36Sopenharmony_ci mov $16, %r13 83162306a36Sopenharmony_ci # Set r13 to be the number of bytes to write out 83262306a36Sopenharmony_ci sub %r12, %r13 83362306a36Sopenharmony_ci jmp .L_count_set_\@ 83462306a36Sopenharmony_ci.L_partial_fill_\@: 83562306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r13 83662306a36Sopenharmony_ci.L_count_set_\@: 83762306a36Sopenharmony_ci vmovdqa %xmm9, %xmm0 83862306a36Sopenharmony_ci vmovq %xmm0, %rax 83962306a36Sopenharmony_ci cmp $8, %r13 84062306a36Sopenharmony_ci jle .L_less_than_8_bytes_left_\@ 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 84362306a36Sopenharmony_ci add $8, \DATA_OFFSET 84462306a36Sopenharmony_ci psrldq $8, %xmm0 84562306a36Sopenharmony_ci vmovq %xmm0, %rax 84662306a36Sopenharmony_ci sub $8, %r13 84762306a36Sopenharmony_ci.L_less_than_8_bytes_left_\@: 84862306a36Sopenharmony_ci movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 84962306a36Sopenharmony_ci add $1, \DATA_OFFSET 85062306a36Sopenharmony_ci shr $8, %rax 85162306a36Sopenharmony_ci sub $1, %r13 85262306a36Sopenharmony_ci jne .L_less_than_8_bytes_left_\@ 85362306a36Sopenharmony_ci.L_partial_block_done_\@: 85462306a36Sopenharmony_ci.endm # PARTIAL_BLOCK 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci############################################################################### 85762306a36Sopenharmony_ci# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 85862306a36Sopenharmony_ci# Input: A and B (128-bits each, bit-reflected) 85962306a36Sopenharmony_ci# Output: C = A*B*x mod poly, (i.e. >>1 ) 86062306a36Sopenharmony_ci# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input 86162306a36Sopenharmony_ci# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. 86262306a36Sopenharmony_ci############################################################################### 86362306a36Sopenharmony_ci.macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci vpshufd $0b01001110, \GH, \T2 86662306a36Sopenharmony_ci vpshufd $0b01001110, \HK, \T3 86762306a36Sopenharmony_ci vpxor \GH , \T2, \T2 # T2 = (a1+a0) 86862306a36Sopenharmony_ci vpxor \HK , \T3, \T3 # T3 = (b1+b0) 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 87162306a36Sopenharmony_ci vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 87262306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) 87362306a36Sopenharmony_ci vpxor \GH, \T2,\T2 87462306a36Sopenharmony_ci vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci vpslldq $8, \T2,\T3 # shift-L T3 2 DWs 87762306a36Sopenharmony_ci vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs 87862306a36Sopenharmony_ci vpxor \T3, \GH, \GH 87962306a36Sopenharmony_ci vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci #first phase of the reduction 88262306a36Sopenharmony_ci vpslld $31, \GH, \T2 # packed right shifting << 31 88362306a36Sopenharmony_ci vpslld $30, \GH, \T3 # packed right shifting shift << 30 88462306a36Sopenharmony_ci vpslld $25, \GH, \T4 # packed right shifting shift << 25 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 88762306a36Sopenharmony_ci vpxor \T4, \T2, \T2 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci vpsrldq $4, \T2, \T5 # shift-R T5 1 DW 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci vpslldq $12, \T2, \T2 # shift-L T2 3 DWs 89262306a36Sopenharmony_ci vpxor \T2, \GH, \GH # first phase of the reduction complete 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci #second phase of the reduction 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci vpsrld $1,\GH, \T2 # packed left shifting >> 1 89762306a36Sopenharmony_ci vpsrld $2,\GH, \T3 # packed left shifting >> 2 89862306a36Sopenharmony_ci vpsrld $7,\GH, \T4 # packed left shifting >> 7 89962306a36Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 90062306a36Sopenharmony_ci vpxor \T4, \T2, \T2 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci vpxor \T5, \T2, \T2 90362306a36Sopenharmony_ci vpxor \T2, \GH, \GH 90462306a36Sopenharmony_ci vpxor \T1, \GH, \GH # the result is in GH 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci.endm 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 91262306a36Sopenharmony_ci vmovdqa \HK, \T5 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 91562306a36Sopenharmony_ci vpxor \T5, \T1, \T1 91662306a36Sopenharmony_ci vmovdqu \T1, HashKey_k(arg2) 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly 91962306a36Sopenharmony_ci vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly 92062306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 92162306a36Sopenharmony_ci vpxor \T5, \T1, \T1 92262306a36Sopenharmony_ci vmovdqu \T1, HashKey_2_k(arg2) 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly 92562306a36Sopenharmony_ci vmovdqu \T5, HashKey_3(arg2) 92662306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 92762306a36Sopenharmony_ci vpxor \T5, \T1, \T1 92862306a36Sopenharmony_ci vmovdqu \T1, HashKey_3_k(arg2) 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly 93162306a36Sopenharmony_ci vmovdqu \T5, HashKey_4(arg2) 93262306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 93362306a36Sopenharmony_ci vpxor \T5, \T1, \T1 93462306a36Sopenharmony_ci vmovdqu \T1, HashKey_4_k(arg2) 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly 93762306a36Sopenharmony_ci vmovdqu \T5, HashKey_5(arg2) 93862306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 93962306a36Sopenharmony_ci vpxor \T5, \T1, \T1 94062306a36Sopenharmony_ci vmovdqu \T1, HashKey_5_k(arg2) 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly 94362306a36Sopenharmony_ci vmovdqu \T5, HashKey_6(arg2) 94462306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 94562306a36Sopenharmony_ci vpxor \T5, \T1, \T1 94662306a36Sopenharmony_ci vmovdqu \T1, HashKey_6_k(arg2) 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly 94962306a36Sopenharmony_ci vmovdqu \T5, HashKey_7(arg2) 95062306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 95162306a36Sopenharmony_ci vpxor \T5, \T1, \T1 95262306a36Sopenharmony_ci vmovdqu \T1, HashKey_7_k(arg2) 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly 95562306a36Sopenharmony_ci vmovdqu \T5, HashKey_8(arg2) 95662306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T1 95762306a36Sopenharmony_ci vpxor \T5, \T1, \T1 95862306a36Sopenharmony_ci vmovdqu \T1, HashKey_8_k(arg2) 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci.endm 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci## if a = number of total plaintext bytes 96362306a36Sopenharmony_ci## b = floor(a/16) 96462306a36Sopenharmony_ci## num_initial_blocks = b mod 4# 96562306a36Sopenharmony_ci## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext 96662306a36Sopenharmony_ci## r10, r11, r12, rax are clobbered 96762306a36Sopenharmony_ci## arg1, arg2, arg3, arg4 are used as pointers only, not modified 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC 97062306a36Sopenharmony_ci i = (8-\num_initial_blocks) 97162306a36Sopenharmony_ci setreg 97262306a36Sopenharmony_ci vmovdqu AadHash(arg2), reg_i 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci # start AES for num_initial_blocks blocks 97562306a36Sopenharmony_ci vmovdqu CurCount(arg2), \CTR 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci i = (9-\num_initial_blocks) 97862306a36Sopenharmony_ci setreg 97962306a36Sopenharmony_ci.rep \num_initial_blocks 98062306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 98162306a36Sopenharmony_ci vmovdqa \CTR, reg_i 98262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap 98362306a36Sopenharmony_ci i = (i+1) 98462306a36Sopenharmony_ci setreg 98562306a36Sopenharmony_ci.endr 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci vmovdqa (arg1), \T_key 98862306a36Sopenharmony_ci i = (9-\num_initial_blocks) 98962306a36Sopenharmony_ci setreg 99062306a36Sopenharmony_ci.rep \num_initial_blocks 99162306a36Sopenharmony_ci vpxor \T_key, reg_i, reg_i 99262306a36Sopenharmony_ci i = (i+1) 99362306a36Sopenharmony_ci setreg 99462306a36Sopenharmony_ci.endr 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci j = 1 99762306a36Sopenharmony_ci setreg 99862306a36Sopenharmony_ci.rep \REP 99962306a36Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 100062306a36Sopenharmony_ci i = (9-\num_initial_blocks) 100162306a36Sopenharmony_ci setreg 100262306a36Sopenharmony_ci.rep \num_initial_blocks 100362306a36Sopenharmony_ci vaesenc \T_key, reg_i, reg_i 100462306a36Sopenharmony_ci i = (i+1) 100562306a36Sopenharmony_ci setreg 100662306a36Sopenharmony_ci.endr 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci j = (j+1) 100962306a36Sopenharmony_ci setreg 101062306a36Sopenharmony_ci.endr 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 101362306a36Sopenharmony_ci i = (9-\num_initial_blocks) 101462306a36Sopenharmony_ci setreg 101562306a36Sopenharmony_ci.rep \num_initial_blocks 101662306a36Sopenharmony_ci vaesenclast \T_key, reg_i, reg_i 101762306a36Sopenharmony_ci i = (i+1) 101862306a36Sopenharmony_ci setreg 101962306a36Sopenharmony_ci.endr 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci i = (9-\num_initial_blocks) 102262306a36Sopenharmony_ci setreg 102362306a36Sopenharmony_ci.rep \num_initial_blocks 102462306a36Sopenharmony_ci vmovdqu (arg4, %r11), \T1 102562306a36Sopenharmony_ci vpxor \T1, reg_i, reg_i 102662306a36Sopenharmony_ci vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for num_initial_blocks blocks 102762306a36Sopenharmony_ci add $16, %r11 102862306a36Sopenharmony_ci.if \ENC_DEC == DEC 102962306a36Sopenharmony_ci vmovdqa \T1, reg_i 103062306a36Sopenharmony_ci.endif 103162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations 103262306a36Sopenharmony_ci i = (i+1) 103362306a36Sopenharmony_ci setreg 103462306a36Sopenharmony_ci.endr 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci i = (8-\num_initial_blocks) 103862306a36Sopenharmony_ci j = (9-\num_initial_blocks) 103962306a36Sopenharmony_ci setreg 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci.rep \num_initial_blocks 104262306a36Sopenharmony_ci vpxor reg_i, reg_j, reg_j 104362306a36Sopenharmony_ci GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks 104462306a36Sopenharmony_ci i = (i+1) 104562306a36Sopenharmony_ci j = (j+1) 104662306a36Sopenharmony_ci setreg 104762306a36Sopenharmony_ci.endr 104862306a36Sopenharmony_ci # XMM8 has the combined result here 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci vmovdqa \XMM8, TMP1(%rsp) 105162306a36Sopenharmony_ci vmovdqa \XMM8, \T3 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci cmp $128, %r13 105462306a36Sopenharmony_ci jl .L_initial_blocks_done\@ # no need for precomputed constants 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci############################################################################### 105762306a36Sopenharmony_ci# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 105862306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 105962306a36Sopenharmony_ci vmovdqa \CTR, \XMM1 106062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 106362306a36Sopenharmony_ci vmovdqa \CTR, \XMM2 106462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 106762306a36Sopenharmony_ci vmovdqa \CTR, \XMM3 106862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 107162306a36Sopenharmony_ci vmovdqa \CTR, \XMM4 107262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 107562306a36Sopenharmony_ci vmovdqa \CTR, \XMM5 107662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 107962306a36Sopenharmony_ci vmovdqa \CTR, \XMM6 108062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 108362306a36Sopenharmony_ci vmovdqa \CTR, \XMM7 108462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 108762306a36Sopenharmony_ci vmovdqa \CTR, \XMM8 108862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci vmovdqa (arg1), \T_key 109162306a36Sopenharmony_ci vpxor \T_key, \XMM1, \XMM1 109262306a36Sopenharmony_ci vpxor \T_key, \XMM2, \XMM2 109362306a36Sopenharmony_ci vpxor \T_key, \XMM3, \XMM3 109462306a36Sopenharmony_ci vpxor \T_key, \XMM4, \XMM4 109562306a36Sopenharmony_ci vpxor \T_key, \XMM5, \XMM5 109662306a36Sopenharmony_ci vpxor \T_key, \XMM6, \XMM6 109762306a36Sopenharmony_ci vpxor \T_key, \XMM7, \XMM7 109862306a36Sopenharmony_ci vpxor \T_key, \XMM8, \XMM8 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci i = 1 110162306a36Sopenharmony_ci setreg 110262306a36Sopenharmony_ci.rep \REP # do REP rounds 110362306a36Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 110462306a36Sopenharmony_ci vaesenc \T_key, \XMM1, \XMM1 110562306a36Sopenharmony_ci vaesenc \T_key, \XMM2, \XMM2 110662306a36Sopenharmony_ci vaesenc \T_key, \XMM3, \XMM3 110762306a36Sopenharmony_ci vaesenc \T_key, \XMM4, \XMM4 110862306a36Sopenharmony_ci vaesenc \T_key, \XMM5, \XMM5 110962306a36Sopenharmony_ci vaesenc \T_key, \XMM6, \XMM6 111062306a36Sopenharmony_ci vaesenc \T_key, \XMM7, \XMM7 111162306a36Sopenharmony_ci vaesenc \T_key, \XMM8, \XMM8 111262306a36Sopenharmony_ci i = (i+1) 111362306a36Sopenharmony_ci setreg 111462306a36Sopenharmony_ci.endr 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 111762306a36Sopenharmony_ci vaesenclast \T_key, \XMM1, \XMM1 111862306a36Sopenharmony_ci vaesenclast \T_key, \XMM2, \XMM2 111962306a36Sopenharmony_ci vaesenclast \T_key, \XMM3, \XMM3 112062306a36Sopenharmony_ci vaesenclast \T_key, \XMM4, \XMM4 112162306a36Sopenharmony_ci vaesenclast \T_key, \XMM5, \XMM5 112262306a36Sopenharmony_ci vaesenclast \T_key, \XMM6, \XMM6 112362306a36Sopenharmony_ci vaesenclast \T_key, \XMM7, \XMM7 112462306a36Sopenharmony_ci vaesenclast \T_key, \XMM8, \XMM8 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci vmovdqu (arg4, %r11), \T1 112762306a36Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 112862306a36Sopenharmony_ci vmovdqu \XMM1, (arg3 , %r11) 112962306a36Sopenharmony_ci .if \ENC_DEC == DEC 113062306a36Sopenharmony_ci vmovdqa \T1, \XMM1 113162306a36Sopenharmony_ci .endif 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci vmovdqu 16*1(arg4, %r11), \T1 113462306a36Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 113562306a36Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3 , %r11) 113662306a36Sopenharmony_ci .if \ENC_DEC == DEC 113762306a36Sopenharmony_ci vmovdqa \T1, \XMM2 113862306a36Sopenharmony_ci .endif 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_ci vmovdqu 16*2(arg4, %r11), \T1 114162306a36Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 114262306a36Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3 , %r11) 114362306a36Sopenharmony_ci .if \ENC_DEC == DEC 114462306a36Sopenharmony_ci vmovdqa \T1, \XMM3 114562306a36Sopenharmony_ci .endif 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci vmovdqu 16*3(arg4, %r11), \T1 114862306a36Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 114962306a36Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3 , %r11) 115062306a36Sopenharmony_ci .if \ENC_DEC == DEC 115162306a36Sopenharmony_ci vmovdqa \T1, \XMM4 115262306a36Sopenharmony_ci .endif 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci vmovdqu 16*4(arg4, %r11), \T1 115562306a36Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 115662306a36Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3 , %r11) 115762306a36Sopenharmony_ci .if \ENC_DEC == DEC 115862306a36Sopenharmony_ci vmovdqa \T1, \XMM5 115962306a36Sopenharmony_ci .endif 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci vmovdqu 16*5(arg4, %r11), \T1 116262306a36Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 116362306a36Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3 , %r11) 116462306a36Sopenharmony_ci .if \ENC_DEC == DEC 116562306a36Sopenharmony_ci vmovdqa \T1, \XMM6 116662306a36Sopenharmony_ci .endif 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci vmovdqu 16*6(arg4, %r11), \T1 116962306a36Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 117062306a36Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3 , %r11) 117162306a36Sopenharmony_ci .if \ENC_DEC == DEC 117262306a36Sopenharmony_ci vmovdqa \T1, \XMM7 117362306a36Sopenharmony_ci .endif 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci vmovdqu 16*7(arg4, %r11), \T1 117662306a36Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 117762306a36Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3 , %r11) 117862306a36Sopenharmony_ci .if \ENC_DEC == DEC 117962306a36Sopenharmony_ci vmovdqa \T1, \XMM8 118062306a36Sopenharmony_ci .endif 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_ci add $128, %r11 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 118562306a36Sopenharmony_ci vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext 118662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 118762306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 118862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 118962306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 119062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 119162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 119262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci############################################################################### 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci.L_initial_blocks_done\@: 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci.endm 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci# encrypt 8 blocks at a time 120162306a36Sopenharmony_ci# ghash the 8 previously encrypted ciphertext blocks 120262306a36Sopenharmony_ci# arg1, arg2, arg3, arg4 are used as pointers only, not modified 120362306a36Sopenharmony_ci# r11 is the data offset value 120462306a36Sopenharmony_ci.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci vmovdqa \XMM1, \T2 120762306a36Sopenharmony_ci vmovdqa \XMM2, TMP2(%rsp) 120862306a36Sopenharmony_ci vmovdqa \XMM3, TMP3(%rsp) 120962306a36Sopenharmony_ci vmovdqa \XMM4, TMP4(%rsp) 121062306a36Sopenharmony_ci vmovdqa \XMM5, TMP5(%rsp) 121162306a36Sopenharmony_ci vmovdqa \XMM6, TMP6(%rsp) 121262306a36Sopenharmony_ci vmovdqa \XMM7, TMP7(%rsp) 121362306a36Sopenharmony_ci vmovdqa \XMM8, TMP8(%rsp) 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci.if \loop_idx == in_order 121662306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT 121762306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM1, \XMM2 121862306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM2, \XMM3 121962306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM3, \XMM4 122062306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM4, \XMM5 122162306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM5, \XMM6 122262306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM6, \XMM7 122362306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM7, \XMM8 122462306a36Sopenharmony_ci vmovdqa \XMM8, \CTR 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 122762306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 122862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 122962306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 123062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 123162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 123262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 123362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 123462306a36Sopenharmony_ci.else 123562306a36Sopenharmony_ci vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT 123662306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM1, \XMM2 123762306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM2, \XMM3 123862306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM3, \XMM4 123962306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM4, \XMM5 124062306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM5, \XMM6 124162306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM6, \XMM7 124262306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM7, \XMM8 124362306a36Sopenharmony_ci vmovdqa \XMM8, \CTR 124462306a36Sopenharmony_ci.endif 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci ####################################################################### 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci vmovdqu (arg1), \T1 125062306a36Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 125162306a36Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 125262306a36Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 125362306a36Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 125462306a36Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 125562306a36Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 125662306a36Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 125762306a36Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci ####################################################################### 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci vmovdqu 16*1(arg1), \T1 126662306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 126762306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 126862306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 126962306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 127062306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 127162306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 127262306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 127362306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci vmovdqu 16*2(arg1), \T1 127662306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 127762306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 127862306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 127962306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 128062306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 128162306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 128262306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 128362306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci ####################################################################### 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 128962306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 129062306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci vpshufd $0b01001110, \T2, \T6 129362306a36Sopenharmony_ci vpxor \T2, \T6, \T6 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci vmovdqu HashKey_8_k(arg2), \T5 129662306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T6, \T6 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci vmovdqu 16*3(arg1), \T1 129962306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 130062306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 130162306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 130262306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 130362306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 130462306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 130562306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 130662306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci vmovdqa TMP2(%rsp), \T1 130962306a36Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 131062306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 131162306a36Sopenharmony_ci vpxor \T3, \T4, \T4 131262306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 131362306a36Sopenharmony_ci vpxor \T3, \T7, \T7 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 131662306a36Sopenharmony_ci vpxor \T1, \T3, \T3 131762306a36Sopenharmony_ci vmovdqu HashKey_7_k(arg2), \T5 131862306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 131962306a36Sopenharmony_ci vpxor \T3, \T6, \T6 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci vmovdqu 16*4(arg1), \T1 132262306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 132362306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 132462306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 132562306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 132662306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 132762306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 132862306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 132962306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci ####################################################################### 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci vmovdqa TMP3(%rsp), \T1 133462306a36Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 133562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 133662306a36Sopenharmony_ci vpxor \T3, \T4, \T4 133762306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 133862306a36Sopenharmony_ci vpxor \T3, \T7, \T7 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 134162306a36Sopenharmony_ci vpxor \T1, \T3, \T3 134262306a36Sopenharmony_ci vmovdqu HashKey_6_k(arg2), \T5 134362306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 134462306a36Sopenharmony_ci vpxor \T3, \T6, \T6 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci vmovdqu 16*5(arg1), \T1 134762306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 134862306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 134962306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 135062306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 135162306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 135262306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 135362306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 135462306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci vmovdqa TMP4(%rsp), \T1 135762306a36Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 135862306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 135962306a36Sopenharmony_ci vpxor \T3, \T4, \T4 136062306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 136162306a36Sopenharmony_ci vpxor \T3, \T7, \T7 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 136462306a36Sopenharmony_ci vpxor \T1, \T3, \T3 136562306a36Sopenharmony_ci vmovdqu HashKey_5_k(arg2), \T5 136662306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 136762306a36Sopenharmony_ci vpxor \T3, \T6, \T6 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci vmovdqu 16*6(arg1), \T1 137062306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 137162306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 137262306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 137362306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 137462306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 137562306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 137662306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 137762306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci vmovdqa TMP5(%rsp), \T1 138162306a36Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 138262306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 138362306a36Sopenharmony_ci vpxor \T3, \T4, \T4 138462306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 138562306a36Sopenharmony_ci vpxor \T3, \T7, \T7 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 138862306a36Sopenharmony_ci vpxor \T1, \T3, \T3 138962306a36Sopenharmony_ci vmovdqu HashKey_4_k(arg2), \T5 139062306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 139162306a36Sopenharmony_ci vpxor \T3, \T6, \T6 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci vmovdqu 16*7(arg1), \T1 139462306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 139562306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 139662306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 139762306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 139862306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 139962306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 140062306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 140162306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci vmovdqa TMP6(%rsp), \T1 140462306a36Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 140562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 140662306a36Sopenharmony_ci vpxor \T3, \T4, \T4 140762306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 140862306a36Sopenharmony_ci vpxor \T3, \T7, \T7 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 141162306a36Sopenharmony_ci vpxor \T1, \T3, \T3 141262306a36Sopenharmony_ci vmovdqu HashKey_3_k(arg2), \T5 141362306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 141462306a36Sopenharmony_ci vpxor \T3, \T6, \T6 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci 141762306a36Sopenharmony_ci vmovdqu 16*8(arg1), \T1 141862306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 141962306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 142062306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 142162306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 142262306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 142362306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 142462306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 142562306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci vmovdqa TMP7(%rsp), \T1 142862306a36Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 142962306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 143062306a36Sopenharmony_ci vpxor \T3, \T4, \T4 143162306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 143262306a36Sopenharmony_ci vpxor \T3, \T7, \T7 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 143562306a36Sopenharmony_ci vpxor \T1, \T3, \T3 143662306a36Sopenharmony_ci vmovdqu HashKey_2_k(arg2), \T5 143762306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 143862306a36Sopenharmony_ci vpxor \T3, \T6, \T6 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci ####################################################################### 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci vmovdqu 16*9(arg1), \T5 144362306a36Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 144462306a36Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 144562306a36Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 144662306a36Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 144762306a36Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 144862306a36Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 144962306a36Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 145062306a36Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci vmovdqa TMP8(%rsp), \T1 145362306a36Sopenharmony_ci vmovdqu HashKey(arg2), \T5 145462306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 145562306a36Sopenharmony_ci vpxor \T3, \T4, \T4 145662306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 145762306a36Sopenharmony_ci vpxor \T3, \T7, \T7 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci vpshufd $0b01001110, \T1, \T3 146062306a36Sopenharmony_ci vpxor \T1, \T3, \T3 146162306a36Sopenharmony_ci vmovdqu HashKey_k(arg2), \T5 146262306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T3, \T3 146362306a36Sopenharmony_ci vpxor \T3, \T6, \T6 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci vpxor \T4, \T6, \T6 146662306a36Sopenharmony_ci vpxor \T7, \T6, \T6 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci vmovdqu 16*10(arg1), \T5 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_ci i = 11 147162306a36Sopenharmony_ci setreg 147262306a36Sopenharmony_ci.rep (\REP-9) 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 147562306a36Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 147662306a36Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 147762306a36Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 147862306a36Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 147962306a36Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 148062306a36Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 148162306a36Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci vmovdqu 16*i(arg1), \T5 148462306a36Sopenharmony_ci i = i + 1 148562306a36Sopenharmony_ci setreg 148662306a36Sopenharmony_ci.endr 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci i = 0 148962306a36Sopenharmony_ci j = 1 149062306a36Sopenharmony_ci setreg 149162306a36Sopenharmony_ci.rep 8 149262306a36Sopenharmony_ci vpxor 16*i(arg4, %r11), \T5, \T2 149362306a36Sopenharmony_ci .if \ENC_DEC == ENC 149462306a36Sopenharmony_ci vaesenclast \T2, reg_j, reg_j 149562306a36Sopenharmony_ci .else 149662306a36Sopenharmony_ci vaesenclast \T2, reg_j, \T3 149762306a36Sopenharmony_ci vmovdqu 16*i(arg4, %r11), reg_j 149862306a36Sopenharmony_ci vmovdqu \T3, 16*i(arg3, %r11) 149962306a36Sopenharmony_ci .endif 150062306a36Sopenharmony_ci i = (i+1) 150162306a36Sopenharmony_ci j = (j+1) 150262306a36Sopenharmony_ci setreg 150362306a36Sopenharmony_ci.endr 150462306a36Sopenharmony_ci ####################################################################### 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci vpslldq $8, \T6, \T3 # shift-L T3 2 DWs 150862306a36Sopenharmony_ci vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs 150962306a36Sopenharmony_ci vpxor \T3, \T7, \T7 151062306a36Sopenharmony_ci vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci ####################################################################### 151562306a36Sopenharmony_ci #first phase of the reduction 151662306a36Sopenharmony_ci ####################################################################### 151762306a36Sopenharmony_ci vpslld $31, \T7, \T2 # packed right shifting << 31 151862306a36Sopenharmony_ci vpslld $30, \T7, \T3 # packed right shifting shift << 30 151962306a36Sopenharmony_ci vpslld $25, \T7, \T4 # packed right shifting shift << 25 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 152262306a36Sopenharmony_ci vpxor \T4, \T2, \T2 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci vpsrldq $4, \T2, \T1 # shift-R T1 1 DW 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci vpslldq $12, \T2, \T2 # shift-L T2 3 DWs 152762306a36Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 152862306a36Sopenharmony_ci ####################################################################### 152962306a36Sopenharmony_ci .if \ENC_DEC == ENC 153062306a36Sopenharmony_ci vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer 153162306a36Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer 153262306a36Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer 153362306a36Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer 153462306a36Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer 153562306a36Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer 153662306a36Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer 153762306a36Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer 153862306a36Sopenharmony_ci .endif 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci ####################################################################### 154162306a36Sopenharmony_ci #second phase of the reduction 154262306a36Sopenharmony_ci vpsrld $1, \T7, \T2 # packed left shifting >> 1 154362306a36Sopenharmony_ci vpsrld $2, \T7, \T3 # packed left shifting >> 2 154462306a36Sopenharmony_ci vpsrld $7, \T7, \T4 # packed left shifting >> 7 154562306a36Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 154662306a36Sopenharmony_ci vpxor \T4, \T2, \T2 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci vpxor \T1, \T2, \T2 154962306a36Sopenharmony_ci vpxor \T2, \T7, \T7 155062306a36Sopenharmony_ci vpxor \T7, \T6, \T6 # the result is in T6 155162306a36Sopenharmony_ci ####################################################################### 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 155462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 155562306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 155662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 155762306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 155862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 155962306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 156062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci vpxor \T6, \XMM1, \XMM1 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci.endm 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_ci# GHASH the last 4 ciphertext blocks. 157162306a36Sopenharmony_ci.macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci ## Karatsuba Method 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci vpshufd $0b01001110, \XMM1, \T2 157762306a36Sopenharmony_ci vpxor \XMM1, \T2, \T2 157862306a36Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 157962306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM1, \T6 158062306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM1, \T7 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci vmovdqu HashKey_8_k(arg2), \T3 158362306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \XMM1 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci ###################### 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci vpshufd $0b01001110, \XMM2, \T2 158862306a36Sopenharmony_ci vpxor \XMM2, \T2, \T2 158962306a36Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 159062306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM2, \T4 159162306a36Sopenharmony_ci vpxor \T4, \T6, \T6 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM2, \T4 159462306a36Sopenharmony_ci vpxor \T4, \T7, \T7 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci vmovdqu HashKey_7_k(arg2), \T3 159762306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 159862306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci ###################### 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci vpshufd $0b01001110, \XMM3, \T2 160362306a36Sopenharmony_ci vpxor \XMM3, \T2, \T2 160462306a36Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 160562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM3, \T4 160662306a36Sopenharmony_ci vpxor \T4, \T6, \T6 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM3, \T4 160962306a36Sopenharmony_ci vpxor \T4, \T7, \T7 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_ci vmovdqu HashKey_6_k(arg2), \T3 161262306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 161362306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci ###################### 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci vpshufd $0b01001110, \XMM4, \T2 161862306a36Sopenharmony_ci vpxor \XMM4, \T2, \T2 161962306a36Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 162062306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM4, \T4 162162306a36Sopenharmony_ci vpxor \T4, \T6, \T6 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM4, \T4 162462306a36Sopenharmony_ci vpxor \T4, \T7, \T7 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci vmovdqu HashKey_5_k(arg2), \T3 162762306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 162862306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci ###################### 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci vpshufd $0b01001110, \XMM5, \T2 163362306a36Sopenharmony_ci vpxor \XMM5, \T2, \T2 163462306a36Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 163562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM5, \T4 163662306a36Sopenharmony_ci vpxor \T4, \T6, \T6 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM5, \T4 163962306a36Sopenharmony_ci vpxor \T4, \T7, \T7 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci vmovdqu HashKey_4_k(arg2), \T3 164262306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 164362306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci ###################### 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_ci vpshufd $0b01001110, \XMM6, \T2 164862306a36Sopenharmony_ci vpxor \XMM6, \T2, \T2 164962306a36Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 165062306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM6, \T4 165162306a36Sopenharmony_ci vpxor \T4, \T6, \T6 165262306a36Sopenharmony_ci 165362306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM6, \T4 165462306a36Sopenharmony_ci vpxor \T4, \T7, \T7 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci vmovdqu HashKey_3_k(arg2), \T3 165762306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 165862306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci ###################### 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci vpshufd $0b01001110, \XMM7, \T2 166362306a36Sopenharmony_ci vpxor \XMM7, \T2, \T2 166462306a36Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 166562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM7, \T4 166662306a36Sopenharmony_ci vpxor \T4, \T6, \T6 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM7, \T4 166962306a36Sopenharmony_ci vpxor \T4, \T7, \T7 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci vmovdqu HashKey_2_k(arg2), \T3 167262306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 167362306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci ###################### 167662306a36Sopenharmony_ci 167762306a36Sopenharmony_ci vpshufd $0b01001110, \XMM8, \T2 167862306a36Sopenharmony_ci vpxor \XMM8, \T2, \T2 167962306a36Sopenharmony_ci vmovdqu HashKey(arg2), \T5 168062306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM8, \T4 168162306a36Sopenharmony_ci vpxor \T4, \T6, \T6 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM8, \T4 168462306a36Sopenharmony_ci vpxor \T4, \T7, \T7 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci vmovdqu HashKey_k(arg2), \T3 168762306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 169062306a36Sopenharmony_ci vpxor \T6, \XMM1, \XMM1 169162306a36Sopenharmony_ci vpxor \T7, \XMM1, \T2 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci vpslldq $8, \T2, \T4 169762306a36Sopenharmony_ci vpsrldq $8, \T2, \T2 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci vpxor \T4, \T7, \T7 170062306a36Sopenharmony_ci vpxor \T2, \T6, \T6 # <T6:T7> holds the result of 170162306a36Sopenharmony_ci # the accumulated carry-less multiplications 170262306a36Sopenharmony_ci 170362306a36Sopenharmony_ci ####################################################################### 170462306a36Sopenharmony_ci #first phase of the reduction 170562306a36Sopenharmony_ci vpslld $31, \T7, \T2 # packed right shifting << 31 170662306a36Sopenharmony_ci vpslld $30, \T7, \T3 # packed right shifting shift << 30 170762306a36Sopenharmony_ci vpslld $25, \T7, \T4 # packed right shifting shift << 25 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 171062306a36Sopenharmony_ci vpxor \T4, \T2, \T2 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci vpsrldq $4, \T2, \T1 # shift-R T1 1 DW 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci vpslldq $12, \T2, \T2 # shift-L T2 3 DWs 171562306a36Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 171662306a36Sopenharmony_ci ####################################################################### 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci 171962306a36Sopenharmony_ci #second phase of the reduction 172062306a36Sopenharmony_ci vpsrld $1, \T7, \T2 # packed left shifting >> 1 172162306a36Sopenharmony_ci vpsrld $2, \T7, \T3 # packed left shifting >> 2 172262306a36Sopenharmony_ci vpsrld $7, \T7, \T4 # packed left shifting >> 7 172362306a36Sopenharmony_ci vpxor \T3, \T2, \T2 # xor the shifted versions 172462306a36Sopenharmony_ci vpxor \T4, \T2, \T2 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci vpxor \T1, \T2, \T2 172762306a36Sopenharmony_ci vpxor \T2, \T7, \T7 172862306a36Sopenharmony_ci vpxor \T7, \T6, \T6 # the result is in T6 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci.endm 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci############################################################# 173362306a36Sopenharmony_ci#void aesni_gcm_precomp_avx_gen2 173462306a36Sopenharmony_ci# (gcm_data *my_ctx_data, 173562306a36Sopenharmony_ci# gcm_context_data *data, 173662306a36Sopenharmony_ci# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ 173762306a36Sopenharmony_ci# u8 *iv, /* Pre-counter block j0: 4 byte salt 173862306a36Sopenharmony_ci# (from Security Association) concatenated with 8 byte 173962306a36Sopenharmony_ci# Initialisation Vector (from IPSec ESP Payload) 174062306a36Sopenharmony_ci# concatenated with 0x00000001. 16-byte aligned pointer. */ 174162306a36Sopenharmony_ci# const u8 *aad, /* Additional Authentication Data (AAD)*/ 174262306a36Sopenharmony_ci# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ 174362306a36Sopenharmony_ci############################################################# 174462306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_init_avx_gen2) 174562306a36Sopenharmony_ci FUNC_SAVE 174662306a36Sopenharmony_ci INIT GHASH_MUL_AVX, PRECOMPUTE_AVX 174762306a36Sopenharmony_ci FUNC_RESTORE 174862306a36Sopenharmony_ci RET 174962306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_init_avx_gen2) 175062306a36Sopenharmony_ci 175162306a36Sopenharmony_ci############################################################################### 175262306a36Sopenharmony_ci#void aesni_gcm_enc_update_avx_gen2( 175362306a36Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 175462306a36Sopenharmony_ci# gcm_context_data *data, 175562306a36Sopenharmony_ci# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ 175662306a36Sopenharmony_ci# const u8 *in, /* Plaintext input */ 175762306a36Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 175862306a36Sopenharmony_ci############################################################################### 175962306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) 176062306a36Sopenharmony_ci FUNC_SAVE 176162306a36Sopenharmony_ci mov keysize, %eax 176262306a36Sopenharmony_ci cmp $32, %eax 176362306a36Sopenharmony_ci je key_256_enc_update 176462306a36Sopenharmony_ci cmp $16, %eax 176562306a36Sopenharmony_ci je key_128_enc_update 176662306a36Sopenharmony_ci # must be 192 176762306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 176862306a36Sopenharmony_ci FUNC_RESTORE 176962306a36Sopenharmony_ci RET 177062306a36Sopenharmony_cikey_128_enc_update: 177162306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 177262306a36Sopenharmony_ci FUNC_RESTORE 177362306a36Sopenharmony_ci RET 177462306a36Sopenharmony_cikey_256_enc_update: 177562306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 177662306a36Sopenharmony_ci FUNC_RESTORE 177762306a36Sopenharmony_ci RET 177862306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci############################################################################### 178162306a36Sopenharmony_ci#void aesni_gcm_dec_update_avx_gen2( 178262306a36Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 178362306a36Sopenharmony_ci# gcm_context_data *data, 178462306a36Sopenharmony_ci# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ 178562306a36Sopenharmony_ci# const u8 *in, /* Ciphertext input */ 178662306a36Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 178762306a36Sopenharmony_ci############################################################################### 178862306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) 178962306a36Sopenharmony_ci FUNC_SAVE 179062306a36Sopenharmony_ci mov keysize,%eax 179162306a36Sopenharmony_ci cmp $32, %eax 179262306a36Sopenharmony_ci je key_256_dec_update 179362306a36Sopenharmony_ci cmp $16, %eax 179462306a36Sopenharmony_ci je key_128_dec_update 179562306a36Sopenharmony_ci # must be 192 179662306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 179762306a36Sopenharmony_ci FUNC_RESTORE 179862306a36Sopenharmony_ci RET 179962306a36Sopenharmony_cikey_128_dec_update: 180062306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 180162306a36Sopenharmony_ci FUNC_RESTORE 180262306a36Sopenharmony_ci RET 180362306a36Sopenharmony_cikey_256_dec_update: 180462306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 180562306a36Sopenharmony_ci FUNC_RESTORE 180662306a36Sopenharmony_ci RET 180762306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci############################################################################### 181062306a36Sopenharmony_ci#void aesni_gcm_finalize_avx_gen2( 181162306a36Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 181262306a36Sopenharmony_ci# gcm_context_data *data, 181362306a36Sopenharmony_ci# u8 *auth_tag, /* Authenticated Tag output. */ 181462306a36Sopenharmony_ci# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. 181562306a36Sopenharmony_ci# Valid values are 16 (most likely), 12 or 8. */ 181662306a36Sopenharmony_ci############################################################################### 181762306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_finalize_avx_gen2) 181862306a36Sopenharmony_ci FUNC_SAVE 181962306a36Sopenharmony_ci mov keysize,%eax 182062306a36Sopenharmony_ci cmp $32, %eax 182162306a36Sopenharmony_ci je key_256_finalize 182262306a36Sopenharmony_ci cmp $16, %eax 182362306a36Sopenharmony_ci je key_128_finalize 182462306a36Sopenharmony_ci # must be 192 182562306a36Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 182662306a36Sopenharmony_ci FUNC_RESTORE 182762306a36Sopenharmony_ci RET 182862306a36Sopenharmony_cikey_128_finalize: 182962306a36Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 183062306a36Sopenharmony_ci FUNC_RESTORE 183162306a36Sopenharmony_ci RET 183262306a36Sopenharmony_cikey_256_finalize: 183362306a36Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 183462306a36Sopenharmony_ci FUNC_RESTORE 183562306a36Sopenharmony_ci RET 183662306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_finalize_avx_gen2) 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_ci############################################################################### 183962306a36Sopenharmony_ci# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 184062306a36Sopenharmony_ci# Input: A and B (128-bits each, bit-reflected) 184162306a36Sopenharmony_ci# Output: C = A*B*x mod poly, (i.e. >>1 ) 184262306a36Sopenharmony_ci# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input 184362306a36Sopenharmony_ci# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. 184462306a36Sopenharmony_ci############################################################################### 184562306a36Sopenharmony_ci.macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 184862306a36Sopenharmony_ci vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 184962306a36Sopenharmony_ci vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 185062306a36Sopenharmony_ci vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 185162306a36Sopenharmony_ci vpxor \T3, \GH, \GH 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_ci vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs 185562306a36Sopenharmony_ci vpslldq $8 , \GH, \GH # shift-L GH 2 DWs 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci vpxor \T3, \T1, \T1 185862306a36Sopenharmony_ci vpxor \T2, \GH, \GH 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci ####################################################################### 186162306a36Sopenharmony_ci #first phase of the reduction 186262306a36Sopenharmony_ci vmovdqa POLY2(%rip), \T3 186362306a36Sopenharmony_ci 186462306a36Sopenharmony_ci vpclmulqdq $0x01, \GH, \T3, \T2 186562306a36Sopenharmony_ci vpslldq $8, \T2, \T2 # shift-L T2 2 DWs 186662306a36Sopenharmony_ci 186762306a36Sopenharmony_ci vpxor \T2, \GH, \GH # first phase of the reduction complete 186862306a36Sopenharmony_ci ####################################################################### 186962306a36Sopenharmony_ci #second phase of the reduction 187062306a36Sopenharmony_ci vpclmulqdq $0x00, \GH, \T3, \T2 187162306a36Sopenharmony_ci vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci vpclmulqdq $0x10, \GH, \T3, \GH 187462306a36Sopenharmony_ci vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) 187562306a36Sopenharmony_ci 187662306a36Sopenharmony_ci vpxor \T2, \GH, \GH # second phase of the reduction complete 187762306a36Sopenharmony_ci ####################################################################### 187862306a36Sopenharmony_ci vpxor \T1, \GH, \GH # the result is in GH 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci.endm 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 188662306a36Sopenharmony_ci vmovdqa \HK, \T5 188762306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly 188862306a36Sopenharmony_ci vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly 188962306a36Sopenharmony_ci 189062306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly 189162306a36Sopenharmony_ci vmovdqu \T5, HashKey_3(arg2) 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly 189462306a36Sopenharmony_ci vmovdqu \T5, HashKey_4(arg2) 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly 189762306a36Sopenharmony_ci vmovdqu \T5, HashKey_5(arg2) 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly 190062306a36Sopenharmony_ci vmovdqu \T5, HashKey_6(arg2) 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly 190362306a36Sopenharmony_ci vmovdqu \T5, HashKey_7(arg2) 190462306a36Sopenharmony_ci 190562306a36Sopenharmony_ci GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly 190662306a36Sopenharmony_ci vmovdqu \T5, HashKey_8(arg2) 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci.endm 190962306a36Sopenharmony_ci 191062306a36Sopenharmony_ci## if a = number of total plaintext bytes 191162306a36Sopenharmony_ci## b = floor(a/16) 191262306a36Sopenharmony_ci## num_initial_blocks = b mod 4# 191362306a36Sopenharmony_ci## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext 191462306a36Sopenharmony_ci## r10, r11, r12, rax are clobbered 191562306a36Sopenharmony_ci## arg1, arg2, arg3, arg4 are used as pointers only, not modified 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ci.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER 191862306a36Sopenharmony_ci i = (8-\num_initial_blocks) 191962306a36Sopenharmony_ci setreg 192062306a36Sopenharmony_ci vmovdqu AadHash(arg2), reg_i 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ci # start AES for num_initial_blocks blocks 192362306a36Sopenharmony_ci vmovdqu CurCount(arg2), \CTR 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci i = (9-\num_initial_blocks) 192662306a36Sopenharmony_ci setreg 192762306a36Sopenharmony_ci.rep \num_initial_blocks 192862306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 192962306a36Sopenharmony_ci vmovdqa \CTR, reg_i 193062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap 193162306a36Sopenharmony_ci i = (i+1) 193262306a36Sopenharmony_ci setreg 193362306a36Sopenharmony_ci.endr 193462306a36Sopenharmony_ci 193562306a36Sopenharmony_ci vmovdqa (arg1), \T_key 193662306a36Sopenharmony_ci i = (9-\num_initial_blocks) 193762306a36Sopenharmony_ci setreg 193862306a36Sopenharmony_ci.rep \num_initial_blocks 193962306a36Sopenharmony_ci vpxor \T_key, reg_i, reg_i 194062306a36Sopenharmony_ci i = (i+1) 194162306a36Sopenharmony_ci setreg 194262306a36Sopenharmony_ci.endr 194362306a36Sopenharmony_ci 194462306a36Sopenharmony_ci j = 1 194562306a36Sopenharmony_ci setreg 194662306a36Sopenharmony_ci.rep \REP 194762306a36Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 194862306a36Sopenharmony_ci i = (9-\num_initial_blocks) 194962306a36Sopenharmony_ci setreg 195062306a36Sopenharmony_ci.rep \num_initial_blocks 195162306a36Sopenharmony_ci vaesenc \T_key, reg_i, reg_i 195262306a36Sopenharmony_ci i = (i+1) 195362306a36Sopenharmony_ci setreg 195462306a36Sopenharmony_ci.endr 195562306a36Sopenharmony_ci 195662306a36Sopenharmony_ci j = (j+1) 195762306a36Sopenharmony_ci setreg 195862306a36Sopenharmony_ci.endr 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci 196162306a36Sopenharmony_ci vmovdqa 16*j(arg1), \T_key 196262306a36Sopenharmony_ci i = (9-\num_initial_blocks) 196362306a36Sopenharmony_ci setreg 196462306a36Sopenharmony_ci.rep \num_initial_blocks 196562306a36Sopenharmony_ci vaesenclast \T_key, reg_i, reg_i 196662306a36Sopenharmony_ci i = (i+1) 196762306a36Sopenharmony_ci setreg 196862306a36Sopenharmony_ci.endr 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci i = (9-\num_initial_blocks) 197162306a36Sopenharmony_ci setreg 197262306a36Sopenharmony_ci.rep \num_initial_blocks 197362306a36Sopenharmony_ci vmovdqu (arg4, %r11), \T1 197462306a36Sopenharmony_ci vpxor \T1, reg_i, reg_i 197562306a36Sopenharmony_ci vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for 197662306a36Sopenharmony_ci # num_initial_blocks blocks 197762306a36Sopenharmony_ci add $16, %r11 197862306a36Sopenharmony_ci.if \ENC_DEC == DEC 197962306a36Sopenharmony_ci vmovdqa \T1, reg_i 198062306a36Sopenharmony_ci.endif 198162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations 198262306a36Sopenharmony_ci i = (i+1) 198362306a36Sopenharmony_ci setreg 198462306a36Sopenharmony_ci.endr 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci 198762306a36Sopenharmony_ci i = (8-\num_initial_blocks) 198862306a36Sopenharmony_ci j = (9-\num_initial_blocks) 198962306a36Sopenharmony_ci setreg 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci.rep \num_initial_blocks 199262306a36Sopenharmony_ci vpxor reg_i, reg_j, reg_j 199362306a36Sopenharmony_ci GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks 199462306a36Sopenharmony_ci i = (i+1) 199562306a36Sopenharmony_ci j = (j+1) 199662306a36Sopenharmony_ci setreg 199762306a36Sopenharmony_ci.endr 199862306a36Sopenharmony_ci # XMM8 has the combined result here 199962306a36Sopenharmony_ci 200062306a36Sopenharmony_ci vmovdqa \XMM8, TMP1(%rsp) 200162306a36Sopenharmony_ci vmovdqa \XMM8, \T3 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci cmp $128, %r13 200462306a36Sopenharmony_ci jl .L_initial_blocks_done\@ # no need for precomputed constants 200562306a36Sopenharmony_ci 200662306a36Sopenharmony_ci############################################################################### 200762306a36Sopenharmony_ci# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 200862306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 200962306a36Sopenharmony_ci vmovdqa \CTR, \XMM1 201062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 201162306a36Sopenharmony_ci 201262306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 201362306a36Sopenharmony_ci vmovdqa \CTR, \XMM2 201462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 201762306a36Sopenharmony_ci vmovdqa \CTR, \XMM3 201862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 202162306a36Sopenharmony_ci vmovdqa \CTR, \XMM4 202262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 202562306a36Sopenharmony_ci vmovdqa \CTR, \XMM5 202662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 202962306a36Sopenharmony_ci vmovdqa \CTR, \XMM6 203062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 203362306a36Sopenharmony_ci vmovdqa \CTR, \XMM7 203462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 203762306a36Sopenharmony_ci vmovdqa \CTR, \XMM8 203862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 203962306a36Sopenharmony_ci 204062306a36Sopenharmony_ci vmovdqa (arg1), \T_key 204162306a36Sopenharmony_ci vpxor \T_key, \XMM1, \XMM1 204262306a36Sopenharmony_ci vpxor \T_key, \XMM2, \XMM2 204362306a36Sopenharmony_ci vpxor \T_key, \XMM3, \XMM3 204462306a36Sopenharmony_ci vpxor \T_key, \XMM4, \XMM4 204562306a36Sopenharmony_ci vpxor \T_key, \XMM5, \XMM5 204662306a36Sopenharmony_ci vpxor \T_key, \XMM6, \XMM6 204762306a36Sopenharmony_ci vpxor \T_key, \XMM7, \XMM7 204862306a36Sopenharmony_ci vpxor \T_key, \XMM8, \XMM8 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci i = 1 205162306a36Sopenharmony_ci setreg 205262306a36Sopenharmony_ci.rep \REP # do REP rounds 205362306a36Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 205462306a36Sopenharmony_ci vaesenc \T_key, \XMM1, \XMM1 205562306a36Sopenharmony_ci vaesenc \T_key, \XMM2, \XMM2 205662306a36Sopenharmony_ci vaesenc \T_key, \XMM3, \XMM3 205762306a36Sopenharmony_ci vaesenc \T_key, \XMM4, \XMM4 205862306a36Sopenharmony_ci vaesenc \T_key, \XMM5, \XMM5 205962306a36Sopenharmony_ci vaesenc \T_key, \XMM6, \XMM6 206062306a36Sopenharmony_ci vaesenc \T_key, \XMM7, \XMM7 206162306a36Sopenharmony_ci vaesenc \T_key, \XMM8, \XMM8 206262306a36Sopenharmony_ci i = (i+1) 206362306a36Sopenharmony_ci setreg 206462306a36Sopenharmony_ci.endr 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_ci 206762306a36Sopenharmony_ci vmovdqa 16*i(arg1), \T_key 206862306a36Sopenharmony_ci vaesenclast \T_key, \XMM1, \XMM1 206962306a36Sopenharmony_ci vaesenclast \T_key, \XMM2, \XMM2 207062306a36Sopenharmony_ci vaesenclast \T_key, \XMM3, \XMM3 207162306a36Sopenharmony_ci vaesenclast \T_key, \XMM4, \XMM4 207262306a36Sopenharmony_ci vaesenclast \T_key, \XMM5, \XMM5 207362306a36Sopenharmony_ci vaesenclast \T_key, \XMM6, \XMM6 207462306a36Sopenharmony_ci vaesenclast \T_key, \XMM7, \XMM7 207562306a36Sopenharmony_ci vaesenclast \T_key, \XMM8, \XMM8 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci vmovdqu (arg4, %r11), \T1 207862306a36Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 207962306a36Sopenharmony_ci vmovdqu \XMM1, (arg3 , %r11) 208062306a36Sopenharmony_ci .if \ENC_DEC == DEC 208162306a36Sopenharmony_ci vmovdqa \T1, \XMM1 208262306a36Sopenharmony_ci .endif 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci vmovdqu 16*1(arg4, %r11), \T1 208562306a36Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 208662306a36Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3 , %r11) 208762306a36Sopenharmony_ci .if \ENC_DEC == DEC 208862306a36Sopenharmony_ci vmovdqa \T1, \XMM2 208962306a36Sopenharmony_ci .endif 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci vmovdqu 16*2(arg4, %r11), \T1 209262306a36Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 209362306a36Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3 , %r11) 209462306a36Sopenharmony_ci .if \ENC_DEC == DEC 209562306a36Sopenharmony_ci vmovdqa \T1, \XMM3 209662306a36Sopenharmony_ci .endif 209762306a36Sopenharmony_ci 209862306a36Sopenharmony_ci vmovdqu 16*3(arg4, %r11), \T1 209962306a36Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 210062306a36Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3 , %r11) 210162306a36Sopenharmony_ci .if \ENC_DEC == DEC 210262306a36Sopenharmony_ci vmovdqa \T1, \XMM4 210362306a36Sopenharmony_ci .endif 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci vmovdqu 16*4(arg4, %r11), \T1 210662306a36Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 210762306a36Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3 , %r11) 210862306a36Sopenharmony_ci .if \ENC_DEC == DEC 210962306a36Sopenharmony_ci vmovdqa \T1, \XMM5 211062306a36Sopenharmony_ci .endif 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_ci vmovdqu 16*5(arg4, %r11), \T1 211362306a36Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 211462306a36Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3 , %r11) 211562306a36Sopenharmony_ci .if \ENC_DEC == DEC 211662306a36Sopenharmony_ci vmovdqa \T1, \XMM6 211762306a36Sopenharmony_ci .endif 211862306a36Sopenharmony_ci 211962306a36Sopenharmony_ci vmovdqu 16*6(arg4, %r11), \T1 212062306a36Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 212162306a36Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3 , %r11) 212262306a36Sopenharmony_ci .if \ENC_DEC == DEC 212362306a36Sopenharmony_ci vmovdqa \T1, \XMM7 212462306a36Sopenharmony_ci .endif 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci vmovdqu 16*7(arg4, %r11), \T1 212762306a36Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 212862306a36Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3 , %r11) 212962306a36Sopenharmony_ci .if \ENC_DEC == DEC 213062306a36Sopenharmony_ci vmovdqa \T1, \XMM8 213162306a36Sopenharmony_ci .endif 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci add $128, %r11 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 213662306a36Sopenharmony_ci vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with 213762306a36Sopenharmony_ci # the corresponding ciphertext 213862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 213962306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 214062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 214162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 214262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 214362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 214462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci############################################################################### 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci.L_initial_blocks_done\@: 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ci.endm 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci 215562306a36Sopenharmony_ci# encrypt 8 blocks at a time 215662306a36Sopenharmony_ci# ghash the 8 previously encrypted ciphertext blocks 215762306a36Sopenharmony_ci# arg1, arg2, arg3, arg4 are used as pointers only, not modified 215862306a36Sopenharmony_ci# r11 is the data offset value 215962306a36Sopenharmony_ci.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci vmovdqa \XMM1, \T2 216262306a36Sopenharmony_ci vmovdqa \XMM2, TMP2(%rsp) 216362306a36Sopenharmony_ci vmovdqa \XMM3, TMP3(%rsp) 216462306a36Sopenharmony_ci vmovdqa \XMM4, TMP4(%rsp) 216562306a36Sopenharmony_ci vmovdqa \XMM5, TMP5(%rsp) 216662306a36Sopenharmony_ci vmovdqa \XMM6, TMP6(%rsp) 216762306a36Sopenharmony_ci vmovdqa \XMM7, TMP7(%rsp) 216862306a36Sopenharmony_ci vmovdqa \XMM8, TMP8(%rsp) 216962306a36Sopenharmony_ci 217062306a36Sopenharmony_ci.if \loop_idx == in_order 217162306a36Sopenharmony_ci vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT 217262306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM1, \XMM2 217362306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM2, \XMM3 217462306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM3, \XMM4 217562306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM4, \XMM5 217662306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM5, \XMM6 217762306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM6, \XMM7 217862306a36Sopenharmony_ci vpaddd ONE(%rip), \XMM7, \XMM8 217962306a36Sopenharmony_ci vmovdqa \XMM8, \CTR 218062306a36Sopenharmony_ci 218162306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 218262306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 218362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 218462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 218562306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 218662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 218762306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 218862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 218962306a36Sopenharmony_ci.else 219062306a36Sopenharmony_ci vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT 219162306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM1, \XMM2 219262306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM2, \XMM3 219362306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM3, \XMM4 219462306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM4, \XMM5 219562306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM5, \XMM6 219662306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM6, \XMM7 219762306a36Sopenharmony_ci vpaddd ONEf(%rip), \XMM7, \XMM8 219862306a36Sopenharmony_ci vmovdqa \XMM8, \CTR 219962306a36Sopenharmony_ci.endif 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci ####################################################################### 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ci vmovdqu (arg1), \T1 220562306a36Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 220662306a36Sopenharmony_ci vpxor \T1, \XMM2, \XMM2 220762306a36Sopenharmony_ci vpxor \T1, \XMM3, \XMM3 220862306a36Sopenharmony_ci vpxor \T1, \XMM4, \XMM4 220962306a36Sopenharmony_ci vpxor \T1, \XMM5, \XMM5 221062306a36Sopenharmony_ci vpxor \T1, \XMM6, \XMM6 221162306a36Sopenharmony_ci vpxor \T1, \XMM7, \XMM7 221262306a36Sopenharmony_ci vpxor \T1, \XMM8, \XMM8 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci ####################################################################### 221562306a36Sopenharmony_ci 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci 222062306a36Sopenharmony_ci vmovdqu 16*1(arg1), \T1 222162306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 222262306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 222362306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 222462306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 222562306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 222662306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 222762306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 222862306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 222962306a36Sopenharmony_ci 223062306a36Sopenharmony_ci vmovdqu 16*2(arg1), \T1 223162306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 223262306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 223362306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 223462306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 223562306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 223662306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 223762306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 223862306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ci 224162306a36Sopenharmony_ci ####################################################################### 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 224462306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 224562306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 224662306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 224762306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 224862306a36Sopenharmony_ci vpxor \T5, \T6, \T6 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ci vmovdqu 16*3(arg1), \T1 225162306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 225262306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 225362306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 225462306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 225562306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 225662306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 225762306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 225862306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci vmovdqa TMP2(%rsp), \T1 226162306a36Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 226262306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 226362306a36Sopenharmony_ci vpxor \T3, \T4, \T4 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 226662306a36Sopenharmony_ci vpxor \T3, \T7, \T7 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 226962306a36Sopenharmony_ci vpxor \T3, \T6, \T6 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 227262306a36Sopenharmony_ci vpxor \T3, \T6, \T6 227362306a36Sopenharmony_ci 227462306a36Sopenharmony_ci vmovdqu 16*4(arg1), \T1 227562306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 227662306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 227762306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 227862306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 227962306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 228062306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 228162306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 228262306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_ci ####################################################################### 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci vmovdqa TMP3(%rsp), \T1 228762306a36Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 228862306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 228962306a36Sopenharmony_ci vpxor \T3, \T4, \T4 229062306a36Sopenharmony_ci 229162306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 229262306a36Sopenharmony_ci vpxor \T3, \T7, \T7 229362306a36Sopenharmony_ci 229462306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 229562306a36Sopenharmony_ci vpxor \T3, \T6, \T6 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 229862306a36Sopenharmony_ci vpxor \T3, \T6, \T6 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci vmovdqu 16*5(arg1), \T1 230162306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 230262306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 230362306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 230462306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 230562306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 230662306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 230762306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 230862306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_ci vmovdqa TMP4(%rsp), \T1 231162306a36Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 231262306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 231362306a36Sopenharmony_ci vpxor \T3, \T4, \T4 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 231662306a36Sopenharmony_ci vpxor \T3, \T7, \T7 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 231962306a36Sopenharmony_ci vpxor \T3, \T6, \T6 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 232262306a36Sopenharmony_ci vpxor \T3, \T6, \T6 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci vmovdqu 16*6(arg1), \T1 232562306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 232662306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 232762306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 232862306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 232962306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 233062306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 233162306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 233262306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 233362306a36Sopenharmony_ci 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_ci vmovdqa TMP5(%rsp), \T1 233662306a36Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 233762306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 233862306a36Sopenharmony_ci vpxor \T3, \T4, \T4 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 234162306a36Sopenharmony_ci vpxor \T3, \T7, \T7 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 234462306a36Sopenharmony_ci vpxor \T3, \T6, \T6 234562306a36Sopenharmony_ci 234662306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 234762306a36Sopenharmony_ci vpxor \T3, \T6, \T6 234862306a36Sopenharmony_ci 234962306a36Sopenharmony_ci vmovdqu 16*7(arg1), \T1 235062306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 235162306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 235262306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 235362306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 235462306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 235562306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 235662306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 235762306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 235862306a36Sopenharmony_ci 235962306a36Sopenharmony_ci vmovdqa TMP6(%rsp), \T1 236062306a36Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 236162306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 236262306a36Sopenharmony_ci vpxor \T3, \T4, \T4 236362306a36Sopenharmony_ci 236462306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 236562306a36Sopenharmony_ci vpxor \T3, \T7, \T7 236662306a36Sopenharmony_ci 236762306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 236862306a36Sopenharmony_ci vpxor \T3, \T6, \T6 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 237162306a36Sopenharmony_ci vpxor \T3, \T6, \T6 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci vmovdqu 16*8(arg1), \T1 237462306a36Sopenharmony_ci vaesenc \T1, \XMM1, \XMM1 237562306a36Sopenharmony_ci vaesenc \T1, \XMM2, \XMM2 237662306a36Sopenharmony_ci vaesenc \T1, \XMM3, \XMM3 237762306a36Sopenharmony_ci vaesenc \T1, \XMM4, \XMM4 237862306a36Sopenharmony_ci vaesenc \T1, \XMM5, \XMM5 237962306a36Sopenharmony_ci vaesenc \T1, \XMM6, \XMM6 238062306a36Sopenharmony_ci vaesenc \T1, \XMM7, \XMM7 238162306a36Sopenharmony_ci vaesenc \T1, \XMM8, \XMM8 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci vmovdqa TMP7(%rsp), \T1 238462306a36Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 238562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 238662306a36Sopenharmony_ci vpxor \T3, \T4, \T4 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 238962306a36Sopenharmony_ci vpxor \T3, \T7, \T7 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 239262306a36Sopenharmony_ci vpxor \T3, \T6, \T6 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 239562306a36Sopenharmony_ci vpxor \T3, \T6, \T6 239662306a36Sopenharmony_ci 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci ####################################################################### 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ci vmovdqu 16*9(arg1), \T5 240162306a36Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 240262306a36Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 240362306a36Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 240462306a36Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 240562306a36Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 240662306a36Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 240762306a36Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 240862306a36Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 240962306a36Sopenharmony_ci 241062306a36Sopenharmony_ci vmovdqa TMP8(%rsp), \T1 241162306a36Sopenharmony_ci vmovdqu HashKey(arg2), \T5 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \T1, \T3 241462306a36Sopenharmony_ci vpxor \T3, \T7, \T7 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci vpclmulqdq $0x01, \T5, \T1, \T3 241762306a36Sopenharmony_ci vpxor \T3, \T6, \T6 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci vpclmulqdq $0x10, \T5, \T1, \T3 242062306a36Sopenharmony_ci vpxor \T3, \T6, \T6 242162306a36Sopenharmony_ci 242262306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \T1, \T3 242362306a36Sopenharmony_ci vpxor \T3, \T4, \T1 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci 242662306a36Sopenharmony_ci vmovdqu 16*10(arg1), \T5 242762306a36Sopenharmony_ci 242862306a36Sopenharmony_ci i = 11 242962306a36Sopenharmony_ci setreg 243062306a36Sopenharmony_ci.rep (\REP-9) 243162306a36Sopenharmony_ci vaesenc \T5, \XMM1, \XMM1 243262306a36Sopenharmony_ci vaesenc \T5, \XMM2, \XMM2 243362306a36Sopenharmony_ci vaesenc \T5, \XMM3, \XMM3 243462306a36Sopenharmony_ci vaesenc \T5, \XMM4, \XMM4 243562306a36Sopenharmony_ci vaesenc \T5, \XMM5, \XMM5 243662306a36Sopenharmony_ci vaesenc \T5, \XMM6, \XMM6 243762306a36Sopenharmony_ci vaesenc \T5, \XMM7, \XMM7 243862306a36Sopenharmony_ci vaesenc \T5, \XMM8, \XMM8 243962306a36Sopenharmony_ci 244062306a36Sopenharmony_ci vmovdqu 16*i(arg1), \T5 244162306a36Sopenharmony_ci i = i + 1 244262306a36Sopenharmony_ci setreg 244362306a36Sopenharmony_ci.endr 244462306a36Sopenharmony_ci 244562306a36Sopenharmony_ci i = 0 244662306a36Sopenharmony_ci j = 1 244762306a36Sopenharmony_ci setreg 244862306a36Sopenharmony_ci.rep 8 244962306a36Sopenharmony_ci vpxor 16*i(arg4, %r11), \T5, \T2 245062306a36Sopenharmony_ci .if \ENC_DEC == ENC 245162306a36Sopenharmony_ci vaesenclast \T2, reg_j, reg_j 245262306a36Sopenharmony_ci .else 245362306a36Sopenharmony_ci vaesenclast \T2, reg_j, \T3 245462306a36Sopenharmony_ci vmovdqu 16*i(arg4, %r11), reg_j 245562306a36Sopenharmony_ci vmovdqu \T3, 16*i(arg3, %r11) 245662306a36Sopenharmony_ci .endif 245762306a36Sopenharmony_ci i = (i+1) 245862306a36Sopenharmony_ci j = (j+1) 245962306a36Sopenharmony_ci setreg 246062306a36Sopenharmony_ci.endr 246162306a36Sopenharmony_ci ####################################################################### 246262306a36Sopenharmony_ci 246362306a36Sopenharmony_ci 246462306a36Sopenharmony_ci vpslldq $8, \T6, \T3 # shift-L T3 2 DWs 246562306a36Sopenharmony_ci vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs 246662306a36Sopenharmony_ci vpxor \T3, \T7, \T7 246762306a36Sopenharmony_ci vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci 247062306a36Sopenharmony_ci 247162306a36Sopenharmony_ci ####################################################################### 247262306a36Sopenharmony_ci #first phase of the reduction 247362306a36Sopenharmony_ci vmovdqa POLY2(%rip), \T3 247462306a36Sopenharmony_ci 247562306a36Sopenharmony_ci vpclmulqdq $0x01, \T7, \T3, \T2 247662306a36Sopenharmony_ci vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 247962306a36Sopenharmony_ci ####################################################################### 248062306a36Sopenharmony_ci .if \ENC_DEC == ENC 248162306a36Sopenharmony_ci vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer 248262306a36Sopenharmony_ci vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer 248362306a36Sopenharmony_ci vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer 248462306a36Sopenharmony_ci vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer 248562306a36Sopenharmony_ci vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer 248662306a36Sopenharmony_ci vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer 248762306a36Sopenharmony_ci vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer 248862306a36Sopenharmony_ci vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer 248962306a36Sopenharmony_ci .endif 249062306a36Sopenharmony_ci 249162306a36Sopenharmony_ci ####################################################################### 249262306a36Sopenharmony_ci #second phase of the reduction 249362306a36Sopenharmony_ci vpclmulqdq $0x00, \T7, \T3, \T2 249462306a36Sopenharmony_ci vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci vpclmulqdq $0x10, \T7, \T3, \T4 249762306a36Sopenharmony_ci vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_ci vpxor \T2, \T4, \T4 # second phase of the reduction complete 250062306a36Sopenharmony_ci ####################################################################### 250162306a36Sopenharmony_ci vpxor \T4, \T1, \T1 # the result is in T1 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap 250462306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap 250562306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap 250662306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap 250762306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap 250862306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap 250962306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap 251062306a36Sopenharmony_ci vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap 251162306a36Sopenharmony_ci 251262306a36Sopenharmony_ci 251362306a36Sopenharmony_ci vpxor \T1, \XMM1, \XMM1 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci.endm 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci 252062306a36Sopenharmony_ci# GHASH the last 4 ciphertext blocks. 252162306a36Sopenharmony_ci.macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci ## Karatsuba Method 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci vmovdqu HashKey_8(arg2), \T5 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_ci vpshufd $0b01001110, \XMM1, \T2 252862306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 252962306a36Sopenharmony_ci vpxor \XMM1, \T2, \T2 253062306a36Sopenharmony_ci vpxor \T5, \T3, \T3 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM1, \T6 253362306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM1, \T7 253462306a36Sopenharmony_ci 253562306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \XMM1 253662306a36Sopenharmony_ci 253762306a36Sopenharmony_ci ###################### 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci vmovdqu HashKey_7(arg2), \T5 254062306a36Sopenharmony_ci vpshufd $0b01001110, \XMM2, \T2 254162306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 254262306a36Sopenharmony_ci vpxor \XMM2, \T2, \T2 254362306a36Sopenharmony_ci vpxor \T5, \T3, \T3 254462306a36Sopenharmony_ci 254562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM2, \T4 254662306a36Sopenharmony_ci vpxor \T4, \T6, \T6 254762306a36Sopenharmony_ci 254862306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM2, \T4 254962306a36Sopenharmony_ci vpxor \T4, \T7, \T7 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 255462306a36Sopenharmony_ci 255562306a36Sopenharmony_ci ###################### 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci vmovdqu HashKey_6(arg2), \T5 255862306a36Sopenharmony_ci vpshufd $0b01001110, \XMM3, \T2 255962306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 256062306a36Sopenharmony_ci vpxor \XMM3, \T2, \T2 256162306a36Sopenharmony_ci vpxor \T5, \T3, \T3 256262306a36Sopenharmony_ci 256362306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM3, \T4 256462306a36Sopenharmony_ci vpxor \T4, \T6, \T6 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM3, \T4 256762306a36Sopenharmony_ci vpxor \T4, \T7, \T7 256862306a36Sopenharmony_ci 256962306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 257062306a36Sopenharmony_ci 257162306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 257262306a36Sopenharmony_ci 257362306a36Sopenharmony_ci ###################### 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci vmovdqu HashKey_5(arg2), \T5 257662306a36Sopenharmony_ci vpshufd $0b01001110, \XMM4, \T2 257762306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 257862306a36Sopenharmony_ci vpxor \XMM4, \T2, \T2 257962306a36Sopenharmony_ci vpxor \T5, \T3, \T3 258062306a36Sopenharmony_ci 258162306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM4, \T4 258262306a36Sopenharmony_ci vpxor \T4, \T6, \T6 258362306a36Sopenharmony_ci 258462306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM4, \T4 258562306a36Sopenharmony_ci vpxor \T4, \T7, \T7 258662306a36Sopenharmony_ci 258762306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 259062306a36Sopenharmony_ci 259162306a36Sopenharmony_ci ###################### 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci vmovdqu HashKey_4(arg2), \T5 259462306a36Sopenharmony_ci vpshufd $0b01001110, \XMM5, \T2 259562306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 259662306a36Sopenharmony_ci vpxor \XMM5, \T2, \T2 259762306a36Sopenharmony_ci vpxor \T5, \T3, \T3 259862306a36Sopenharmony_ci 259962306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM5, \T4 260062306a36Sopenharmony_ci vpxor \T4, \T6, \T6 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM5, \T4 260362306a36Sopenharmony_ci vpxor \T4, \T7, \T7 260462306a36Sopenharmony_ci 260562306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci ###################### 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci vmovdqu HashKey_3(arg2), \T5 261262306a36Sopenharmony_ci vpshufd $0b01001110, \XMM6, \T2 261362306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 261462306a36Sopenharmony_ci vpxor \XMM6, \T2, \T2 261562306a36Sopenharmony_ci vpxor \T5, \T3, \T3 261662306a36Sopenharmony_ci 261762306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM6, \T4 261862306a36Sopenharmony_ci vpxor \T4, \T6, \T6 261962306a36Sopenharmony_ci 262062306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM6, \T4 262162306a36Sopenharmony_ci vpxor \T4, \T7, \T7 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 262662306a36Sopenharmony_ci 262762306a36Sopenharmony_ci ###################### 262862306a36Sopenharmony_ci 262962306a36Sopenharmony_ci vmovdqu HashKey_2(arg2), \T5 263062306a36Sopenharmony_ci vpshufd $0b01001110, \XMM7, \T2 263162306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 263262306a36Sopenharmony_ci vpxor \XMM7, \T2, \T2 263362306a36Sopenharmony_ci vpxor \T5, \T3, \T3 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM7, \T4 263662306a36Sopenharmony_ci vpxor \T4, \T6, \T6 263762306a36Sopenharmony_ci 263862306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM7, \T4 263962306a36Sopenharmony_ci vpxor \T4, \T7, \T7 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 264462306a36Sopenharmony_ci 264562306a36Sopenharmony_ci ###################### 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci vmovdqu HashKey(arg2), \T5 264862306a36Sopenharmony_ci vpshufd $0b01001110, \XMM8, \T2 264962306a36Sopenharmony_ci vpshufd $0b01001110, \T5, \T3 265062306a36Sopenharmony_ci vpxor \XMM8, \T2, \T2 265162306a36Sopenharmony_ci vpxor \T5, \T3, \T3 265262306a36Sopenharmony_ci 265362306a36Sopenharmony_ci vpclmulqdq $0x11, \T5, \XMM8, \T4 265462306a36Sopenharmony_ci vpxor \T4, \T6, \T6 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci vpclmulqdq $0x00, \T5, \XMM8, \T4 265762306a36Sopenharmony_ci vpxor \T4, \T7, \T7 265862306a36Sopenharmony_ci 265962306a36Sopenharmony_ci vpclmulqdq $0x00, \T3, \T2, \T2 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci vpxor \T2, \XMM1, \XMM1 266262306a36Sopenharmony_ci vpxor \T6, \XMM1, \XMM1 266362306a36Sopenharmony_ci vpxor \T7, \XMM1, \T2 266462306a36Sopenharmony_ci 266562306a36Sopenharmony_ci 266662306a36Sopenharmony_ci 266762306a36Sopenharmony_ci 266862306a36Sopenharmony_ci vpslldq $8, \T2, \T4 266962306a36Sopenharmony_ci vpsrldq $8, \T2, \T2 267062306a36Sopenharmony_ci 267162306a36Sopenharmony_ci vpxor \T4, \T7, \T7 267262306a36Sopenharmony_ci vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the 267362306a36Sopenharmony_ci # accumulated carry-less multiplications 267462306a36Sopenharmony_ci 267562306a36Sopenharmony_ci ####################################################################### 267662306a36Sopenharmony_ci #first phase of the reduction 267762306a36Sopenharmony_ci vmovdqa POLY2(%rip), \T3 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_ci vpclmulqdq $0x01, \T7, \T3, \T2 268062306a36Sopenharmony_ci vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_ci vpxor \T2, \T7, \T7 # first phase of the reduction complete 268362306a36Sopenharmony_ci ####################################################################### 268462306a36Sopenharmony_ci 268562306a36Sopenharmony_ci 268662306a36Sopenharmony_ci #second phase of the reduction 268762306a36Sopenharmony_ci vpclmulqdq $0x00, \T7, \T3, \T2 268862306a36Sopenharmony_ci vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) 268962306a36Sopenharmony_ci 269062306a36Sopenharmony_ci vpclmulqdq $0x10, \T7, \T3, \T4 269162306a36Sopenharmony_ci vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci vpxor \T2, \T4, \T4 # second phase of the reduction complete 269462306a36Sopenharmony_ci ####################################################################### 269562306a36Sopenharmony_ci vpxor \T4, \T6, \T6 # the result is in T6 269662306a36Sopenharmony_ci.endm 269762306a36Sopenharmony_ci 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_ci 270062306a36Sopenharmony_ci############################################################# 270162306a36Sopenharmony_ci#void aesni_gcm_init_avx_gen4 270262306a36Sopenharmony_ci# (gcm_data *my_ctx_data, 270362306a36Sopenharmony_ci# gcm_context_data *data, 270462306a36Sopenharmony_ci# u8 *iv, /* Pre-counter block j0: 4 byte salt 270562306a36Sopenharmony_ci# (from Security Association) concatenated with 8 byte 270662306a36Sopenharmony_ci# Initialisation Vector (from IPSec ESP Payload) 270762306a36Sopenharmony_ci# concatenated with 0x00000001. 16-byte aligned pointer. */ 270862306a36Sopenharmony_ci# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ 270962306a36Sopenharmony_ci# const u8 *aad, /* Additional Authentication Data (AAD)*/ 271062306a36Sopenharmony_ci# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ 271162306a36Sopenharmony_ci############################################################# 271262306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_init_avx_gen4) 271362306a36Sopenharmony_ci FUNC_SAVE 271462306a36Sopenharmony_ci INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 271562306a36Sopenharmony_ci FUNC_RESTORE 271662306a36Sopenharmony_ci RET 271762306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_init_avx_gen4) 271862306a36Sopenharmony_ci 271962306a36Sopenharmony_ci############################################################################### 272062306a36Sopenharmony_ci#void aesni_gcm_enc_avx_gen4( 272162306a36Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 272262306a36Sopenharmony_ci# gcm_context_data *data, 272362306a36Sopenharmony_ci# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ 272462306a36Sopenharmony_ci# const u8 *in, /* Plaintext input */ 272562306a36Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 272662306a36Sopenharmony_ci############################################################################### 272762306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) 272862306a36Sopenharmony_ci FUNC_SAVE 272962306a36Sopenharmony_ci mov keysize,%eax 273062306a36Sopenharmony_ci cmp $32, %eax 273162306a36Sopenharmony_ci je key_256_enc_update4 273262306a36Sopenharmony_ci cmp $16, %eax 273362306a36Sopenharmony_ci je key_128_enc_update4 273462306a36Sopenharmony_ci # must be 192 273562306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 273662306a36Sopenharmony_ci FUNC_RESTORE 273762306a36Sopenharmony_ci RET 273862306a36Sopenharmony_cikey_128_enc_update4: 273962306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 274062306a36Sopenharmony_ci FUNC_RESTORE 274162306a36Sopenharmony_ci RET 274262306a36Sopenharmony_cikey_256_enc_update4: 274362306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 274462306a36Sopenharmony_ci FUNC_RESTORE 274562306a36Sopenharmony_ci RET 274662306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci############################################################################### 274962306a36Sopenharmony_ci#void aesni_gcm_dec_update_avx_gen4( 275062306a36Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 275162306a36Sopenharmony_ci# gcm_context_data *data, 275262306a36Sopenharmony_ci# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ 275362306a36Sopenharmony_ci# const u8 *in, /* Ciphertext input */ 275462306a36Sopenharmony_ci# u64 plaintext_len) /* Length of data in Bytes for encryption. */ 275562306a36Sopenharmony_ci############################################################################### 275662306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) 275762306a36Sopenharmony_ci FUNC_SAVE 275862306a36Sopenharmony_ci mov keysize,%eax 275962306a36Sopenharmony_ci cmp $32, %eax 276062306a36Sopenharmony_ci je key_256_dec_update4 276162306a36Sopenharmony_ci cmp $16, %eax 276262306a36Sopenharmony_ci je key_128_dec_update4 276362306a36Sopenharmony_ci # must be 192 276462306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 276562306a36Sopenharmony_ci FUNC_RESTORE 276662306a36Sopenharmony_ci RET 276762306a36Sopenharmony_cikey_128_dec_update4: 276862306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 276962306a36Sopenharmony_ci FUNC_RESTORE 277062306a36Sopenharmony_ci RET 277162306a36Sopenharmony_cikey_256_dec_update4: 277262306a36Sopenharmony_ci GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 277362306a36Sopenharmony_ci FUNC_RESTORE 277462306a36Sopenharmony_ci RET 277562306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) 277662306a36Sopenharmony_ci 277762306a36Sopenharmony_ci############################################################################### 277862306a36Sopenharmony_ci#void aesni_gcm_finalize_avx_gen4( 277962306a36Sopenharmony_ci# gcm_data *my_ctx_data, /* aligned to 16 Bytes */ 278062306a36Sopenharmony_ci# gcm_context_data *data, 278162306a36Sopenharmony_ci# u8 *auth_tag, /* Authenticated Tag output. */ 278262306a36Sopenharmony_ci# u64 auth_tag_len)# /* Authenticated Tag Length in bytes. 278362306a36Sopenharmony_ci# Valid values are 16 (most likely), 12 or 8. */ 278462306a36Sopenharmony_ci############################################################################### 278562306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_finalize_avx_gen4) 278662306a36Sopenharmony_ci FUNC_SAVE 278762306a36Sopenharmony_ci mov keysize,%eax 278862306a36Sopenharmony_ci cmp $32, %eax 278962306a36Sopenharmony_ci je key_256_finalize4 279062306a36Sopenharmony_ci cmp $16, %eax 279162306a36Sopenharmony_ci je key_128_finalize4 279262306a36Sopenharmony_ci # must be 192 279362306a36Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 279462306a36Sopenharmony_ci FUNC_RESTORE 279562306a36Sopenharmony_ci RET 279662306a36Sopenharmony_cikey_128_finalize4: 279762306a36Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 279862306a36Sopenharmony_ci FUNC_RESTORE 279962306a36Sopenharmony_ci RET 280062306a36Sopenharmony_cikey_256_finalize4: 280162306a36Sopenharmony_ci GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 280262306a36Sopenharmony_ci FUNC_RESTORE 280362306a36Sopenharmony_ci RET 280462306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_finalize_avx_gen4) 2805