18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Implement AES algorithm in Intel AES-NI instructions. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * The white paper of AES-NI instructions can be downloaded from: 68c2ecf20Sopenharmony_ci * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Copyright (C) 2008, Intel Corp. 98c2ecf20Sopenharmony_ci * Author: Huang Ying <ying.huang@intel.com> 108c2ecf20Sopenharmony_ci * Vinodh Gopal <vinodh.gopal@intel.com> 118c2ecf20Sopenharmony_ci * Kahraman Akdemir 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD 148c2ecf20Sopenharmony_ci * interface for 64-bit kernels. 158c2ecf20Sopenharmony_ci * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) 168c2ecf20Sopenharmony_ci * Aidan O'Mahony (aidan.o.mahony@intel.com) 178c2ecf20Sopenharmony_ci * Adrian Hoban <adrian.hoban@intel.com> 188c2ecf20Sopenharmony_ci * James Guilford (james.guilford@intel.com) 198c2ecf20Sopenharmony_ci * Gabriele Paoloni <gabriele.paoloni@intel.com> 208c2ecf20Sopenharmony_ci * Tadeusz Struk (tadeusz.struk@intel.com) 218c2ecf20Sopenharmony_ci * Wajdi Feghali (wajdi.k.feghali@intel.com) 228c2ecf20Sopenharmony_ci * Copyright (c) 2010, Intel Corporation. 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * Ported x86_64 version to x86: 258c2ecf20Sopenharmony_ci * Author: Mathias Krause <minipli@googlemail.com> 268c2ecf20Sopenharmony_ci */ 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#include <linux/linkage.h> 298c2ecf20Sopenharmony_ci#include <asm/frame.h> 308c2ecf20Sopenharmony_ci#include <asm/nospec-branch.h> 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ci * The following macros are used to move an (un)aligned 16 byte value to/from 348c2ecf20Sopenharmony_ci * an XMM register. This can done for either FP or integer values, for FP use 358c2ecf20Sopenharmony_ci * movaps (move aligned packed single) or integer use movdqa (move double quad 368c2ecf20Sopenharmony_ci * aligned). It doesn't make a performance difference which instruction is used 378c2ecf20Sopenharmony_ci * since Nehalem (original Core i7) was released. However, the movaps is a byte 388c2ecf20Sopenharmony_ci * shorter, so that is the one we'll use for now. (same for unaligned). 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci#define MOVADQ movaps 418c2ecf20Sopenharmony_ci#define MOVUDQ movups 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#ifdef __x86_64__ 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci# constants in mergeable sections, linker can reorder and merge 468c2ecf20Sopenharmony_ci.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 478c2ecf20Sopenharmony_ci.align 16 488c2ecf20Sopenharmony_ci.Lgf128mul_x_ble_mask: 498c2ecf20Sopenharmony_ci .octa 0x00000000000000010000000000000087 508c2ecf20Sopenharmony_ci.section .rodata.cst16.POLY, "aM", @progbits, 16 518c2ecf20Sopenharmony_ci.align 16 528c2ecf20Sopenharmony_ciPOLY: .octa 0xC2000000000000000000000000000001 538c2ecf20Sopenharmony_ci.section .rodata.cst16.TWOONE, "aM", @progbits, 16 548c2ecf20Sopenharmony_ci.align 16 558c2ecf20Sopenharmony_ciTWOONE: .octa 0x00000001000000000000000000000001 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 588c2ecf20Sopenharmony_ci.align 16 598c2ecf20Sopenharmony_ciSHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F 608c2ecf20Sopenharmony_ci.section .rodata.cst16.MASK1, "aM", @progbits, 16 618c2ecf20Sopenharmony_ci.align 16 628c2ecf20Sopenharmony_ciMASK1: .octa 0x0000000000000000ffffffffffffffff 638c2ecf20Sopenharmony_ci.section .rodata.cst16.MASK2, "aM", @progbits, 16 648c2ecf20Sopenharmony_ci.align 16 658c2ecf20Sopenharmony_ciMASK2: .octa 0xffffffffffffffff0000000000000000 668c2ecf20Sopenharmony_ci.section .rodata.cst16.ONE, "aM", @progbits, 16 678c2ecf20Sopenharmony_ci.align 16 688c2ecf20Sopenharmony_ciONE: .octa 0x00000000000000000000000000000001 698c2ecf20Sopenharmony_ci.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 708c2ecf20Sopenharmony_ci.align 16 718c2ecf20Sopenharmony_ciF_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 728c2ecf20Sopenharmony_ci.section .rodata.cst16.dec, "aM", @progbits, 16 738c2ecf20Sopenharmony_ci.align 16 748c2ecf20Sopenharmony_cidec: .octa 0x1 758c2ecf20Sopenharmony_ci.section .rodata.cst16.enc, "aM", @progbits, 16 768c2ecf20Sopenharmony_ci.align 16 778c2ecf20Sopenharmony_cienc: .octa 0x2 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci# order of these constants should not change. 808c2ecf20Sopenharmony_ci# more specifically, ALL_F should follow SHIFT_MASK, 818c2ecf20Sopenharmony_ci# and zero should follow ALL_F 828c2ecf20Sopenharmony_ci.section .rodata, "a", @progbits 838c2ecf20Sopenharmony_ci.align 16 848c2ecf20Sopenharmony_ciSHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 858c2ecf20Sopenharmony_ciALL_F: .octa 0xffffffffffffffffffffffffffffffff 868c2ecf20Sopenharmony_ci .octa 0x00000000000000000000000000000000 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci.text 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci#define STACK_OFFSET 8*3 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci#define AadHash 16*0 948c2ecf20Sopenharmony_ci#define AadLen 16*1 958c2ecf20Sopenharmony_ci#define InLen (16*1)+8 968c2ecf20Sopenharmony_ci#define PBlockEncKey 16*2 978c2ecf20Sopenharmony_ci#define OrigIV 16*3 988c2ecf20Sopenharmony_ci#define CurCount 16*4 998c2ecf20Sopenharmony_ci#define PBlockLen 16*5 1008c2ecf20Sopenharmony_ci#define HashKey 16*6 // store HashKey <<1 mod poly here 1018c2ecf20Sopenharmony_ci#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here 1028c2ecf20Sopenharmony_ci#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here 1038c2ecf20Sopenharmony_ci#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here 1048c2ecf20Sopenharmony_ci#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 1058c2ecf20Sopenharmony_ci // bits of HashKey <<1 mod poly here 1068c2ecf20Sopenharmony_ci //(for Karatsuba purposes) 1078c2ecf20Sopenharmony_ci#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 1088c2ecf20Sopenharmony_ci // bits of HashKey^2 <<1 mod poly here 1098c2ecf20Sopenharmony_ci // (for Karatsuba purposes) 1108c2ecf20Sopenharmony_ci#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 1118c2ecf20Sopenharmony_ci // bits of HashKey^3 <<1 mod poly here 1128c2ecf20Sopenharmony_ci // (for Karatsuba purposes) 1138c2ecf20Sopenharmony_ci#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 1148c2ecf20Sopenharmony_ci // bits of HashKey^4 <<1 mod poly here 1158c2ecf20Sopenharmony_ci // (for Karatsuba purposes) 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci#define arg1 rdi 1188c2ecf20Sopenharmony_ci#define arg2 rsi 1198c2ecf20Sopenharmony_ci#define arg3 rdx 1208c2ecf20Sopenharmony_ci#define arg4 rcx 1218c2ecf20Sopenharmony_ci#define arg5 r8 1228c2ecf20Sopenharmony_ci#define arg6 r9 1238c2ecf20Sopenharmony_ci#define arg7 STACK_OFFSET+8(%rsp) 1248c2ecf20Sopenharmony_ci#define arg8 STACK_OFFSET+16(%rsp) 1258c2ecf20Sopenharmony_ci#define arg9 STACK_OFFSET+24(%rsp) 1268c2ecf20Sopenharmony_ci#define arg10 STACK_OFFSET+32(%rsp) 1278c2ecf20Sopenharmony_ci#define arg11 STACK_OFFSET+40(%rsp) 1288c2ecf20Sopenharmony_ci#define keysize 2*15*16(%arg1) 1298c2ecf20Sopenharmony_ci#endif 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci#define STATE1 %xmm0 1338c2ecf20Sopenharmony_ci#define STATE2 %xmm4 1348c2ecf20Sopenharmony_ci#define STATE3 %xmm5 1358c2ecf20Sopenharmony_ci#define STATE4 %xmm6 1368c2ecf20Sopenharmony_ci#define STATE STATE1 1378c2ecf20Sopenharmony_ci#define IN1 %xmm1 1388c2ecf20Sopenharmony_ci#define IN2 %xmm7 1398c2ecf20Sopenharmony_ci#define IN3 %xmm8 1408c2ecf20Sopenharmony_ci#define IN4 %xmm9 1418c2ecf20Sopenharmony_ci#define IN IN1 1428c2ecf20Sopenharmony_ci#define KEY %xmm2 1438c2ecf20Sopenharmony_ci#define IV %xmm3 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci#define BSWAP_MASK %xmm10 1468c2ecf20Sopenharmony_ci#define CTR %xmm11 1478c2ecf20Sopenharmony_ci#define INC %xmm12 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci#define GF128MUL_MASK %xmm10 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci#ifdef __x86_64__ 1528c2ecf20Sopenharmony_ci#define AREG %rax 1538c2ecf20Sopenharmony_ci#define KEYP %rdi 1548c2ecf20Sopenharmony_ci#define OUTP %rsi 1558c2ecf20Sopenharmony_ci#define UKEYP OUTP 1568c2ecf20Sopenharmony_ci#define INP %rdx 1578c2ecf20Sopenharmony_ci#define LEN %rcx 1588c2ecf20Sopenharmony_ci#define IVP %r8 1598c2ecf20Sopenharmony_ci#define KLEN %r9d 1608c2ecf20Sopenharmony_ci#define T1 %r10 1618c2ecf20Sopenharmony_ci#define TKEYP T1 1628c2ecf20Sopenharmony_ci#define T2 %r11 1638c2ecf20Sopenharmony_ci#define TCTR_LOW T2 1648c2ecf20Sopenharmony_ci#else 1658c2ecf20Sopenharmony_ci#define AREG %eax 1668c2ecf20Sopenharmony_ci#define KEYP %edi 1678c2ecf20Sopenharmony_ci#define OUTP AREG 1688c2ecf20Sopenharmony_ci#define UKEYP OUTP 1698c2ecf20Sopenharmony_ci#define INP %edx 1708c2ecf20Sopenharmony_ci#define LEN %esi 1718c2ecf20Sopenharmony_ci#define IVP %ebp 1728c2ecf20Sopenharmony_ci#define KLEN %ebx 1738c2ecf20Sopenharmony_ci#define T1 %ecx 1748c2ecf20Sopenharmony_ci#define TKEYP T1 1758c2ecf20Sopenharmony_ci#endif 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci.macro FUNC_SAVE 1788c2ecf20Sopenharmony_ci push %r12 1798c2ecf20Sopenharmony_ci push %r13 1808c2ecf20Sopenharmony_ci push %r14 1818c2ecf20Sopenharmony_ci# 1828c2ecf20Sopenharmony_ci# states of %xmm registers %xmm6:%xmm15 not saved 1838c2ecf20Sopenharmony_ci# all %xmm registers are clobbered 1848c2ecf20Sopenharmony_ci# 1858c2ecf20Sopenharmony_ci.endm 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci.macro FUNC_RESTORE 1898c2ecf20Sopenharmony_ci pop %r14 1908c2ecf20Sopenharmony_ci pop %r13 1918c2ecf20Sopenharmony_ci pop %r12 1928c2ecf20Sopenharmony_ci.endm 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci# Precompute hashkeys. 1958c2ecf20Sopenharmony_ci# Input: Hash subkey. 1968c2ecf20Sopenharmony_ci# Output: HashKeys stored in gcm_context_data. Only needs to be called 1978c2ecf20Sopenharmony_ci# once per key. 1988c2ecf20Sopenharmony_ci# clobbers r12, and tmp xmm registers. 1998c2ecf20Sopenharmony_ci.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 2008c2ecf20Sopenharmony_ci mov \SUBKEY, %r12 2018c2ecf20Sopenharmony_ci movdqu (%r12), \TMP3 2028c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), \TMP2 2038c2ecf20Sopenharmony_ci pshufb \TMP2, \TMP3 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci movdqa \TMP3, \TMP2 2088c2ecf20Sopenharmony_ci psllq $1, \TMP3 2098c2ecf20Sopenharmony_ci psrlq $63, \TMP2 2108c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP1 2118c2ecf20Sopenharmony_ci pslldq $8, \TMP2 2128c2ecf20Sopenharmony_ci psrldq $8, \TMP1 2138c2ecf20Sopenharmony_ci por \TMP2, \TMP3 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci # reduce HashKey<<1 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci pshufd $0x24, \TMP1, \TMP2 2188c2ecf20Sopenharmony_ci pcmpeqd TWOONE(%rip), \TMP2 2198c2ecf20Sopenharmony_ci pand POLY(%rip), \TMP2 2208c2ecf20Sopenharmony_ci pxor \TMP2, \TMP3 2218c2ecf20Sopenharmony_ci movdqu \TMP3, HashKey(%arg2) 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci movdqa \TMP3, \TMP5 2248c2ecf20Sopenharmony_ci pshufd $78, \TMP3, \TMP1 2258c2ecf20Sopenharmony_ci pxor \TMP3, \TMP1 2268c2ecf20Sopenharmony_ci movdqu \TMP1, HashKey_k(%arg2) 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 2298c2ecf20Sopenharmony_ci# TMP5 = HashKey^2<<1 (mod poly) 2308c2ecf20Sopenharmony_ci movdqu \TMP5, HashKey_2(%arg2) 2318c2ecf20Sopenharmony_ci# HashKey_2 = HashKey^2<<1 (mod poly) 2328c2ecf20Sopenharmony_ci pshufd $78, \TMP5, \TMP1 2338c2ecf20Sopenharmony_ci pxor \TMP5, \TMP1 2348c2ecf20Sopenharmony_ci movdqu \TMP1, HashKey_2_k(%arg2) 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 2378c2ecf20Sopenharmony_ci# TMP5 = HashKey^3<<1 (mod poly) 2388c2ecf20Sopenharmony_ci movdqu \TMP5, HashKey_3(%arg2) 2398c2ecf20Sopenharmony_ci pshufd $78, \TMP5, \TMP1 2408c2ecf20Sopenharmony_ci pxor \TMP5, \TMP1 2418c2ecf20Sopenharmony_ci movdqu \TMP1, HashKey_3_k(%arg2) 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 2448c2ecf20Sopenharmony_ci# TMP5 = HashKey^3<<1 (mod poly) 2458c2ecf20Sopenharmony_ci movdqu \TMP5, HashKey_4(%arg2) 2468c2ecf20Sopenharmony_ci pshufd $78, \TMP5, \TMP1 2478c2ecf20Sopenharmony_ci pxor \TMP5, \TMP1 2488c2ecf20Sopenharmony_ci movdqu \TMP1, HashKey_4_k(%arg2) 2498c2ecf20Sopenharmony_ci.endm 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. 2528c2ecf20Sopenharmony_ci# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 2538c2ecf20Sopenharmony_ci.macro GCM_INIT Iv SUBKEY AAD AADLEN 2548c2ecf20Sopenharmony_ci mov \AADLEN, %r11 2558c2ecf20Sopenharmony_ci mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length 2568c2ecf20Sopenharmony_ci xor %r11d, %r11d 2578c2ecf20Sopenharmony_ci mov %r11, InLen(%arg2) # ctx_data.in_length = 0 2588c2ecf20Sopenharmony_ci mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 2598c2ecf20Sopenharmony_ci mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 2608c2ecf20Sopenharmony_ci mov \Iv, %rax 2618c2ecf20Sopenharmony_ci movdqu (%rax), %xmm0 2628c2ecf20Sopenharmony_ci movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm2 2658c2ecf20Sopenharmony_ci pshufb %xmm2, %xmm0 2668c2ecf20Sopenharmony_ci movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 2698c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ 2728c2ecf20Sopenharmony_ci %xmm4, %xmm5, %xmm6 2738c2ecf20Sopenharmony_ci.endm 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context 2768c2ecf20Sopenharmony_ci# struct has been initialized by GCM_INIT. 2778c2ecf20Sopenharmony_ci# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK 2788c2ecf20Sopenharmony_ci# Clobbers rax, r10-r13, and xmm0-xmm15 2798c2ecf20Sopenharmony_ci.macro GCM_ENC_DEC operation 2808c2ecf20Sopenharmony_ci movdqu AadHash(%arg2), %xmm8 2818c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 2828c2ecf20Sopenharmony_ci add %arg5, InLen(%arg2) 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci xor %r11d, %r11d # initialise the data pointer offset as zero 2858c2ecf20Sopenharmony_ci PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci sub %r11, %arg5 # sub partial block data used 2888c2ecf20Sopenharmony_ci mov %arg5, %r13 # save the number of bytes 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) 2918c2ecf20Sopenharmony_ci mov %r13, %r12 2928c2ecf20Sopenharmony_ci # Encrypt/Decrypt first few blocks 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci and $(3<<4), %r12 2958c2ecf20Sopenharmony_ci jz _initial_num_blocks_is_0_\@ 2968c2ecf20Sopenharmony_ci cmp $(2<<4), %r12 2978c2ecf20Sopenharmony_ci jb _initial_num_blocks_is_1_\@ 2988c2ecf20Sopenharmony_ci je _initial_num_blocks_is_2_\@ 2998c2ecf20Sopenharmony_ci_initial_num_blocks_is_3_\@: 3008c2ecf20Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 3018c2ecf20Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation 3028c2ecf20Sopenharmony_ci sub $48, %r13 3038c2ecf20Sopenharmony_ci jmp _initial_blocks_\@ 3048c2ecf20Sopenharmony_ci_initial_num_blocks_is_2_\@: 3058c2ecf20Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 3068c2ecf20Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation 3078c2ecf20Sopenharmony_ci sub $32, %r13 3088c2ecf20Sopenharmony_ci jmp _initial_blocks_\@ 3098c2ecf20Sopenharmony_ci_initial_num_blocks_is_1_\@: 3108c2ecf20Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 3118c2ecf20Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation 3128c2ecf20Sopenharmony_ci sub $16, %r13 3138c2ecf20Sopenharmony_ci jmp _initial_blocks_\@ 3148c2ecf20Sopenharmony_ci_initial_num_blocks_is_0_\@: 3158c2ecf20Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 3168c2ecf20Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation 3178c2ecf20Sopenharmony_ci_initial_blocks_\@: 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci # Main loop - Encrypt/Decrypt remaining blocks 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci test %r13, %r13 3228c2ecf20Sopenharmony_ci je _zero_cipher_left_\@ 3238c2ecf20Sopenharmony_ci sub $64, %r13 3248c2ecf20Sopenharmony_ci je _four_cipher_left_\@ 3258c2ecf20Sopenharmony_ci_crypt_by_4_\@: 3268c2ecf20Sopenharmony_ci GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ 3278c2ecf20Sopenharmony_ci %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ 3288c2ecf20Sopenharmony_ci %xmm7, %xmm8, enc 3298c2ecf20Sopenharmony_ci add $64, %r11 3308c2ecf20Sopenharmony_ci sub $64, %r13 3318c2ecf20Sopenharmony_ci jne _crypt_by_4_\@ 3328c2ecf20Sopenharmony_ci_four_cipher_left_\@: 3338c2ecf20Sopenharmony_ci GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ 3348c2ecf20Sopenharmony_ci%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 3358c2ecf20Sopenharmony_ci_zero_cipher_left_\@: 3368c2ecf20Sopenharmony_ci movdqu %xmm8, AadHash(%arg2) 3378c2ecf20Sopenharmony_ci movdqu %xmm0, CurCount(%arg2) 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci mov %arg5, %r13 3408c2ecf20Sopenharmony_ci and $15, %r13 # %r13 = arg5 (mod 16) 3418c2ecf20Sopenharmony_ci je _multiple_of_16_bytes_\@ 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci mov %r13, PBlockLen(%arg2) 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci # Handle the last <16 Byte block separately 3468c2ecf20Sopenharmony_ci paddd ONE(%rip), %xmm0 # INCR CNT to get Yn 3478c2ecf20Sopenharmony_ci movdqu %xmm0, CurCount(%arg2) 3488c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 3498c2ecf20Sopenharmony_ci pshufb %xmm10, %xmm0 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) 3528c2ecf20Sopenharmony_ci movdqu %xmm0, PBlockEncKey(%arg2) 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci cmp $16, %arg5 3558c2ecf20Sopenharmony_ci jge _large_enough_update_\@ 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci lea (%arg4,%r11,1), %r10 3588c2ecf20Sopenharmony_ci mov %r13, %r12 3598c2ecf20Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 3608c2ecf20Sopenharmony_ci jmp _data_read_\@ 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci_large_enough_update_\@: 3638c2ecf20Sopenharmony_ci sub $16, %r11 3648c2ecf20Sopenharmony_ci add %r13, %r11 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci # receive the last <16 Byte block 3678c2ecf20Sopenharmony_ci movdqu (%arg4, %r11, 1), %xmm1 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci sub %r13, %r11 3708c2ecf20Sopenharmony_ci add $16, %r11 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci lea SHIFT_MASK+16(%rip), %r12 3738c2ecf20Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift 16-r13 bytes 3748c2ecf20Sopenharmony_ci # (r13 is the number of bytes in plaintext mod 16) 3758c2ecf20Sopenharmony_ci sub %r13, %r12 3768c2ecf20Sopenharmony_ci # get the appropriate shuffle mask 3778c2ecf20Sopenharmony_ci movdqu (%r12), %xmm2 3788c2ecf20Sopenharmony_ci # shift right 16-r13 bytes 3798c2ecf20Sopenharmony_ci pshufb %xmm2, %xmm1 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci_data_read_\@: 3828c2ecf20Sopenharmony_ci lea ALL_F+16(%rip), %r12 3838c2ecf20Sopenharmony_ci sub %r13, %r12 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci.ifc \operation, dec 3868c2ecf20Sopenharmony_ci movdqa %xmm1, %xmm2 3878c2ecf20Sopenharmony_ci.endif 3888c2ecf20Sopenharmony_ci pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) 3898c2ecf20Sopenharmony_ci movdqu (%r12), %xmm1 3908c2ecf20Sopenharmony_ci # get the appropriate mask to mask out top 16-r13 bytes of xmm0 3918c2ecf20Sopenharmony_ci pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 3928c2ecf20Sopenharmony_ci.ifc \operation, dec 3938c2ecf20Sopenharmony_ci pand %xmm1, %xmm2 3948c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 3958c2ecf20Sopenharmony_ci pshufb %xmm10 ,%xmm2 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci pxor %xmm2, %xmm8 3988c2ecf20Sopenharmony_ci.else 3998c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 4008c2ecf20Sopenharmony_ci pshufb %xmm10,%xmm0 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci pxor %xmm0, %xmm8 4038c2ecf20Sopenharmony_ci.endif 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci movdqu %xmm8, AadHash(%arg2) 4068c2ecf20Sopenharmony_ci.ifc \operation, enc 4078c2ecf20Sopenharmony_ci # GHASH computation for the last <16 byte block 4088c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 4098c2ecf20Sopenharmony_ci # shuffle xmm0 back to output as ciphertext 4108c2ecf20Sopenharmony_ci pshufb %xmm10, %xmm0 4118c2ecf20Sopenharmony_ci.endif 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci # Output %r13 bytes 4148c2ecf20Sopenharmony_ci movq %xmm0, %rax 4158c2ecf20Sopenharmony_ci cmp $8, %r13 4168c2ecf20Sopenharmony_ci jle _less_than_8_bytes_left_\@ 4178c2ecf20Sopenharmony_ci mov %rax, (%arg3 , %r11, 1) 4188c2ecf20Sopenharmony_ci add $8, %r11 4198c2ecf20Sopenharmony_ci psrldq $8, %xmm0 4208c2ecf20Sopenharmony_ci movq %xmm0, %rax 4218c2ecf20Sopenharmony_ci sub $8, %r13 4228c2ecf20Sopenharmony_ci_less_than_8_bytes_left_\@: 4238c2ecf20Sopenharmony_ci mov %al, (%arg3, %r11, 1) 4248c2ecf20Sopenharmony_ci add $1, %r11 4258c2ecf20Sopenharmony_ci shr $8, %rax 4268c2ecf20Sopenharmony_ci sub $1, %r13 4278c2ecf20Sopenharmony_ci jne _less_than_8_bytes_left_\@ 4288c2ecf20Sopenharmony_ci_multiple_of_16_bytes_\@: 4298c2ecf20Sopenharmony_ci.endm 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci# GCM_COMPLETE Finishes update of tag of last partial block 4328c2ecf20Sopenharmony_ci# Output: Authorization Tag (AUTH_TAG) 4338c2ecf20Sopenharmony_ci# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 4348c2ecf20Sopenharmony_ci.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN 4358c2ecf20Sopenharmony_ci movdqu AadHash(%arg2), %xmm8 4368c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci mov PBlockLen(%arg2), %r12 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci test %r12, %r12 4418c2ecf20Sopenharmony_ci je _partial_done\@ 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci_partial_done\@: 4468c2ecf20Sopenharmony_ci mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) 4478c2ecf20Sopenharmony_ci shl $3, %r12 # convert into number of bits 4488c2ecf20Sopenharmony_ci movd %r12d, %xmm15 # len(A) in %xmm15 4498c2ecf20Sopenharmony_ci mov InLen(%arg2), %r12 4508c2ecf20Sopenharmony_ci shl $3, %r12 # len(C) in bits (*128) 4518c2ecf20Sopenharmony_ci movq %r12, %xmm1 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 4548c2ecf20Sopenharmony_ci pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) 4558c2ecf20Sopenharmony_ci pxor %xmm15, %xmm8 4568c2ecf20Sopenharmony_ci GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 4578c2ecf20Sopenharmony_ci # final GHASH computation 4588c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 4598c2ecf20Sopenharmony_ci pshufb %xmm10, %xmm8 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 4628c2ecf20Sopenharmony_ci ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) 4638c2ecf20Sopenharmony_ci pxor %xmm8, %xmm0 4648c2ecf20Sopenharmony_ci_return_T_\@: 4658c2ecf20Sopenharmony_ci mov \AUTHTAG, %r10 # %r10 = authTag 4668c2ecf20Sopenharmony_ci mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len 4678c2ecf20Sopenharmony_ci cmp $16, %r11 4688c2ecf20Sopenharmony_ci je _T_16_\@ 4698c2ecf20Sopenharmony_ci cmp $8, %r11 4708c2ecf20Sopenharmony_ci jl _T_4_\@ 4718c2ecf20Sopenharmony_ci_T_8_\@: 4728c2ecf20Sopenharmony_ci movq %xmm0, %rax 4738c2ecf20Sopenharmony_ci mov %rax, (%r10) 4748c2ecf20Sopenharmony_ci add $8, %r10 4758c2ecf20Sopenharmony_ci sub $8, %r11 4768c2ecf20Sopenharmony_ci psrldq $8, %xmm0 4778c2ecf20Sopenharmony_ci test %r11, %r11 4788c2ecf20Sopenharmony_ci je _return_T_done_\@ 4798c2ecf20Sopenharmony_ci_T_4_\@: 4808c2ecf20Sopenharmony_ci movd %xmm0, %eax 4818c2ecf20Sopenharmony_ci mov %eax, (%r10) 4828c2ecf20Sopenharmony_ci add $4, %r10 4838c2ecf20Sopenharmony_ci sub $4, %r11 4848c2ecf20Sopenharmony_ci psrldq $4, %xmm0 4858c2ecf20Sopenharmony_ci test %r11, %r11 4868c2ecf20Sopenharmony_ci je _return_T_done_\@ 4878c2ecf20Sopenharmony_ci_T_123_\@: 4888c2ecf20Sopenharmony_ci movd %xmm0, %eax 4898c2ecf20Sopenharmony_ci cmp $2, %r11 4908c2ecf20Sopenharmony_ci jl _T_1_\@ 4918c2ecf20Sopenharmony_ci mov %ax, (%r10) 4928c2ecf20Sopenharmony_ci cmp $2, %r11 4938c2ecf20Sopenharmony_ci je _return_T_done_\@ 4948c2ecf20Sopenharmony_ci add $2, %r10 4958c2ecf20Sopenharmony_ci sar $16, %eax 4968c2ecf20Sopenharmony_ci_T_1_\@: 4978c2ecf20Sopenharmony_ci mov %al, (%r10) 4988c2ecf20Sopenharmony_ci jmp _return_T_done_\@ 4998c2ecf20Sopenharmony_ci_T_16_\@: 5008c2ecf20Sopenharmony_ci movdqu %xmm0, (%r10) 5018c2ecf20Sopenharmony_ci_return_T_done_\@: 5028c2ecf20Sopenharmony_ci.endm 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci#ifdef __x86_64__ 5058c2ecf20Sopenharmony_ci/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 5068c2ecf20Sopenharmony_ci* 5078c2ecf20Sopenharmony_ci* 5088c2ecf20Sopenharmony_ci* Input: A and B (128-bits each, bit-reflected) 5098c2ecf20Sopenharmony_ci* Output: C = A*B*x mod poly, (i.e. >>1 ) 5108c2ecf20Sopenharmony_ci* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input 5118c2ecf20Sopenharmony_ci* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. 5128c2ecf20Sopenharmony_ci* 5138c2ecf20Sopenharmony_ci*/ 5148c2ecf20Sopenharmony_ci.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 5158c2ecf20Sopenharmony_ci movdqa \GH, \TMP1 5168c2ecf20Sopenharmony_ci pshufd $78, \GH, \TMP2 5178c2ecf20Sopenharmony_ci pshufd $78, \HK, \TMP3 5188c2ecf20Sopenharmony_ci pxor \GH, \TMP2 # TMP2 = a1+a0 5198c2ecf20Sopenharmony_ci pxor \HK, \TMP3 # TMP3 = b1+b0 5208c2ecf20Sopenharmony_ci pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1 5218c2ecf20Sopenharmony_ci pclmulqdq $0x00, \HK, \GH # GH = a0*b0 5228c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) 5238c2ecf20Sopenharmony_ci pxor \GH, \TMP2 5248c2ecf20Sopenharmony_ci pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) 5258c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP3 5268c2ecf20Sopenharmony_ci pslldq $8, \TMP3 # left shift TMP3 2 DWs 5278c2ecf20Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 5288c2ecf20Sopenharmony_ci pxor \TMP3, \GH 5298c2ecf20Sopenharmony_ci pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci # first phase of the reduction 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci movdqa \GH, \TMP2 5348c2ecf20Sopenharmony_ci movdqa \GH, \TMP3 5358c2ecf20Sopenharmony_ci movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 5368c2ecf20Sopenharmony_ci # in in order to perform 5378c2ecf20Sopenharmony_ci # independent shifts 5388c2ecf20Sopenharmony_ci pslld $31, \TMP2 # packed right shift <<31 5398c2ecf20Sopenharmony_ci pslld $30, \TMP3 # packed right shift <<30 5408c2ecf20Sopenharmony_ci pslld $25, \TMP4 # packed right shift <<25 5418c2ecf20Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 5428c2ecf20Sopenharmony_ci pxor \TMP4, \TMP2 5438c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP5 5448c2ecf20Sopenharmony_ci psrldq $4, \TMP5 # right shift TMP5 1 DW 5458c2ecf20Sopenharmony_ci pslldq $12, \TMP2 # left shift TMP2 3 DWs 5468c2ecf20Sopenharmony_ci pxor \TMP2, \GH 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci # second phase of the reduction 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 5518c2ecf20Sopenharmony_ci # in in order to perform 5528c2ecf20Sopenharmony_ci # independent shifts 5538c2ecf20Sopenharmony_ci movdqa \GH,\TMP3 5548c2ecf20Sopenharmony_ci movdqa \GH,\TMP4 5558c2ecf20Sopenharmony_ci psrld $1,\TMP2 # packed left shift >>1 5568c2ecf20Sopenharmony_ci psrld $2,\TMP3 # packed left shift >>2 5578c2ecf20Sopenharmony_ci psrld $7,\TMP4 # packed left shift >>7 5588c2ecf20Sopenharmony_ci pxor \TMP3,\TMP2 # xor the shifted versions 5598c2ecf20Sopenharmony_ci pxor \TMP4,\TMP2 5608c2ecf20Sopenharmony_ci pxor \TMP5, \TMP2 5618c2ecf20Sopenharmony_ci pxor \TMP2, \GH 5628c2ecf20Sopenharmony_ci pxor \TMP1, \GH # result is in TMP1 5638c2ecf20Sopenharmony_ci.endm 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci# Reads DLEN bytes starting at DPTR and stores in XMMDst 5668c2ecf20Sopenharmony_ci# where 0 < DLEN < 16 5678c2ecf20Sopenharmony_ci# Clobbers %rax, DLEN and XMM1 5688c2ecf20Sopenharmony_ci.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst 5698c2ecf20Sopenharmony_ci cmp $8, \DLEN 5708c2ecf20Sopenharmony_ci jl _read_lt8_\@ 5718c2ecf20Sopenharmony_ci mov (\DPTR), %rax 5728c2ecf20Sopenharmony_ci movq %rax, \XMMDst 5738c2ecf20Sopenharmony_ci sub $8, \DLEN 5748c2ecf20Sopenharmony_ci jz _done_read_partial_block_\@ 5758c2ecf20Sopenharmony_ci xor %eax, %eax 5768c2ecf20Sopenharmony_ci_read_next_byte_\@: 5778c2ecf20Sopenharmony_ci shl $8, %rax 5788c2ecf20Sopenharmony_ci mov 7(\DPTR, \DLEN, 1), %al 5798c2ecf20Sopenharmony_ci dec \DLEN 5808c2ecf20Sopenharmony_ci jnz _read_next_byte_\@ 5818c2ecf20Sopenharmony_ci movq %rax, \XMM1 5828c2ecf20Sopenharmony_ci pslldq $8, \XMM1 5838c2ecf20Sopenharmony_ci por \XMM1, \XMMDst 5848c2ecf20Sopenharmony_ci jmp _done_read_partial_block_\@ 5858c2ecf20Sopenharmony_ci_read_lt8_\@: 5868c2ecf20Sopenharmony_ci xor %eax, %eax 5878c2ecf20Sopenharmony_ci_read_next_byte_lt8_\@: 5888c2ecf20Sopenharmony_ci shl $8, %rax 5898c2ecf20Sopenharmony_ci mov -1(\DPTR, \DLEN, 1), %al 5908c2ecf20Sopenharmony_ci dec \DLEN 5918c2ecf20Sopenharmony_ci jnz _read_next_byte_lt8_\@ 5928c2ecf20Sopenharmony_ci movq %rax, \XMMDst 5938c2ecf20Sopenharmony_ci_done_read_partial_block_\@: 5948c2ecf20Sopenharmony_ci.endm 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. 5978c2ecf20Sopenharmony_ci# clobbers r10-11, xmm14 5988c2ecf20Sopenharmony_ci.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ 5998c2ecf20Sopenharmony_ci TMP6 TMP7 6008c2ecf20Sopenharmony_ci MOVADQ SHUF_MASK(%rip), %xmm14 6018c2ecf20Sopenharmony_ci mov \AAD, %r10 # %r10 = AAD 6028c2ecf20Sopenharmony_ci mov \AADLEN, %r11 # %r11 = aadLen 6038c2ecf20Sopenharmony_ci pxor \TMP7, \TMP7 6048c2ecf20Sopenharmony_ci pxor \TMP6, \TMP6 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci cmp $16, %r11 6078c2ecf20Sopenharmony_ci jl _get_AAD_rest\@ 6088c2ecf20Sopenharmony_ci_get_AAD_blocks\@: 6098c2ecf20Sopenharmony_ci movdqu (%r10), \TMP7 6108c2ecf20Sopenharmony_ci pshufb %xmm14, \TMP7 # byte-reflect the AAD data 6118c2ecf20Sopenharmony_ci pxor \TMP7, \TMP6 6128c2ecf20Sopenharmony_ci GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 6138c2ecf20Sopenharmony_ci add $16, %r10 6148c2ecf20Sopenharmony_ci sub $16, %r11 6158c2ecf20Sopenharmony_ci cmp $16, %r11 6168c2ecf20Sopenharmony_ci jge _get_AAD_blocks\@ 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci movdqu \TMP6, \TMP7 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci /* read the last <16B of AAD */ 6218c2ecf20Sopenharmony_ci_get_AAD_rest\@: 6228c2ecf20Sopenharmony_ci test %r11, %r11 6238c2ecf20Sopenharmony_ci je _get_AAD_done\@ 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 6268c2ecf20Sopenharmony_ci pshufb %xmm14, \TMP7 # byte-reflect the AAD data 6278c2ecf20Sopenharmony_ci pxor \TMP6, \TMP7 6288c2ecf20Sopenharmony_ci GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 6298c2ecf20Sopenharmony_ci movdqu \TMP7, \TMP6 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci_get_AAD_done\@: 6328c2ecf20Sopenharmony_ci movdqu \TMP6, AadHash(%arg2) 6338c2ecf20Sopenharmony_ci.endm 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks 6368c2ecf20Sopenharmony_ci# between update calls. 6378c2ecf20Sopenharmony_ci# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK 6388c2ecf20Sopenharmony_ci# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context 6398c2ecf20Sopenharmony_ci# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 6408c2ecf20Sopenharmony_ci.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ 6418c2ecf20Sopenharmony_ci AAD_HASH operation 6428c2ecf20Sopenharmony_ci mov PBlockLen(%arg2), %r13 6438c2ecf20Sopenharmony_ci test %r13, %r13 6448c2ecf20Sopenharmony_ci je _partial_block_done_\@ # Leave Macro if no partial blocks 6458c2ecf20Sopenharmony_ci # Read in input data without over reading 6468c2ecf20Sopenharmony_ci cmp $16, \PLAIN_CYPH_LEN 6478c2ecf20Sopenharmony_ci jl _fewer_than_16_bytes_\@ 6488c2ecf20Sopenharmony_ci movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm 6498c2ecf20Sopenharmony_ci jmp _data_read_\@ 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci_fewer_than_16_bytes_\@: 6528c2ecf20Sopenharmony_ci lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 6538c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r12 6548c2ecf20Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci mov PBlockLen(%arg2), %r13 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci_data_read_\@: # Finished reading in data 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci movdqu PBlockEncKey(%arg2), %xmm9 6618c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci lea SHIFT_MASK(%rip), %r12 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift r13 bytes 6668c2ecf20Sopenharmony_ci # r16-r13 is the number of bytes in plaintext mod 16) 6678c2ecf20Sopenharmony_ci add %r13, %r12 6688c2ecf20Sopenharmony_ci movdqu (%r12), %xmm2 # get the appropriate shuffle mask 6698c2ecf20Sopenharmony_ci pshufb %xmm2, %xmm9 # shift right r13 bytes 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci.ifc \operation, dec 6728c2ecf20Sopenharmony_ci movdqa %xmm1, %xmm3 6738c2ecf20Sopenharmony_ci pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 6768c2ecf20Sopenharmony_ci add %r13, %r10 6778c2ecf20Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 6788c2ecf20Sopenharmony_ci sub $16, %r10 6798c2ecf20Sopenharmony_ci # Determine if if partial block is not being filled and 6808c2ecf20Sopenharmony_ci # shift mask accordingly 6818c2ecf20Sopenharmony_ci jge _no_extra_mask_1_\@ 6828c2ecf20Sopenharmony_ci sub %r10, %r12 6838c2ecf20Sopenharmony_ci_no_extra_mask_1_\@: 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 6868c2ecf20Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 6878c2ecf20Sopenharmony_ci pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci pand %xmm1, %xmm3 6908c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 6918c2ecf20Sopenharmony_ci pshufb %xmm10, %xmm3 6928c2ecf20Sopenharmony_ci pshufb %xmm2, %xmm3 6938c2ecf20Sopenharmony_ci pxor %xmm3, \AAD_HASH 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci test %r10, %r10 6968c2ecf20Sopenharmony_ci jl _partial_incomplete_1_\@ 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci # GHASH computation for the last <16 Byte block 6998c2ecf20Sopenharmony_ci GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 7008c2ecf20Sopenharmony_ci xor %eax, %eax 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci mov %rax, PBlockLen(%arg2) 7038c2ecf20Sopenharmony_ci jmp _dec_done_\@ 7048c2ecf20Sopenharmony_ci_partial_incomplete_1_\@: 7058c2ecf20Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(%arg2) 7068c2ecf20Sopenharmony_ci_dec_done_\@: 7078c2ecf20Sopenharmony_ci movdqu \AAD_HASH, AadHash(%arg2) 7088c2ecf20Sopenharmony_ci.else 7098c2ecf20Sopenharmony_ci pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 7128c2ecf20Sopenharmony_ci add %r13, %r10 7138c2ecf20Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 7148c2ecf20Sopenharmony_ci sub $16, %r10 7158c2ecf20Sopenharmony_ci # Determine if if partial block is not being filled and 7168c2ecf20Sopenharmony_ci # shift mask accordingly 7178c2ecf20Sopenharmony_ci jge _no_extra_mask_2_\@ 7188c2ecf20Sopenharmony_ci sub %r10, %r12 7198c2ecf20Sopenharmony_ci_no_extra_mask_2_\@: 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_ci movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 7228c2ecf20Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 7238c2ecf20Sopenharmony_ci pand %xmm1, %xmm9 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm1 7268c2ecf20Sopenharmony_ci pshufb %xmm1, %xmm9 7278c2ecf20Sopenharmony_ci pshufb %xmm2, %xmm9 7288c2ecf20Sopenharmony_ci pxor %xmm9, \AAD_HASH 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci test %r10, %r10 7318c2ecf20Sopenharmony_ci jl _partial_incomplete_2_\@ 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci # GHASH computation for the last <16 Byte block 7348c2ecf20Sopenharmony_ci GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 7358c2ecf20Sopenharmony_ci xor %eax, %eax 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_ci mov %rax, PBlockLen(%arg2) 7388c2ecf20Sopenharmony_ci jmp _encode_done_\@ 7398c2ecf20Sopenharmony_ci_partial_incomplete_2_\@: 7408c2ecf20Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(%arg2) 7418c2ecf20Sopenharmony_ci_encode_done_\@: 7428c2ecf20Sopenharmony_ci movdqu \AAD_HASH, AadHash(%arg2) 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 7458c2ecf20Sopenharmony_ci # shuffle xmm9 back to output as ciphertext 7468c2ecf20Sopenharmony_ci pshufb %xmm10, %xmm9 7478c2ecf20Sopenharmony_ci pshufb %xmm2, %xmm9 7488c2ecf20Sopenharmony_ci.endif 7498c2ecf20Sopenharmony_ci # output encrypted Bytes 7508c2ecf20Sopenharmony_ci test %r10, %r10 7518c2ecf20Sopenharmony_ci jl _partial_fill_\@ 7528c2ecf20Sopenharmony_ci mov %r13, %r12 7538c2ecf20Sopenharmony_ci mov $16, %r13 7548c2ecf20Sopenharmony_ci # Set r13 to be the number of bytes to write out 7558c2ecf20Sopenharmony_ci sub %r12, %r13 7568c2ecf20Sopenharmony_ci jmp _count_set_\@ 7578c2ecf20Sopenharmony_ci_partial_fill_\@: 7588c2ecf20Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r13 7598c2ecf20Sopenharmony_ci_count_set_\@: 7608c2ecf20Sopenharmony_ci movdqa %xmm9, %xmm0 7618c2ecf20Sopenharmony_ci movq %xmm0, %rax 7628c2ecf20Sopenharmony_ci cmp $8, %r13 7638c2ecf20Sopenharmony_ci jle _less_than_8_bytes_left_\@ 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 7668c2ecf20Sopenharmony_ci add $8, \DATA_OFFSET 7678c2ecf20Sopenharmony_ci psrldq $8, %xmm0 7688c2ecf20Sopenharmony_ci movq %xmm0, %rax 7698c2ecf20Sopenharmony_ci sub $8, %r13 7708c2ecf20Sopenharmony_ci_less_than_8_bytes_left_\@: 7718c2ecf20Sopenharmony_ci movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 7728c2ecf20Sopenharmony_ci add $1, \DATA_OFFSET 7738c2ecf20Sopenharmony_ci shr $8, %rax 7748c2ecf20Sopenharmony_ci sub $1, %r13 7758c2ecf20Sopenharmony_ci jne _less_than_8_bytes_left_\@ 7768c2ecf20Sopenharmony_ci_partial_block_done_\@: 7778c2ecf20Sopenharmony_ci.endm # PARTIAL_BLOCK 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci/* 7808c2ecf20Sopenharmony_ci* if a = number of total plaintext bytes 7818c2ecf20Sopenharmony_ci* b = floor(a/16) 7828c2ecf20Sopenharmony_ci* num_initial_blocks = b mod 4 7838c2ecf20Sopenharmony_ci* encrypt the initial num_initial_blocks blocks and apply ghash on 7848c2ecf20Sopenharmony_ci* the ciphertext 7858c2ecf20Sopenharmony_ci* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers 7868c2ecf20Sopenharmony_ci* are clobbered 7878c2ecf20Sopenharmony_ci* arg1, %arg2, %arg3 are used as a pointer only, not modified 7888c2ecf20Sopenharmony_ci*/ 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ 7928c2ecf20Sopenharmony_ci XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation 7938c2ecf20Sopenharmony_ci MOVADQ SHUF_MASK(%rip), %xmm14 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci # start AES for num_initial_blocks blocks 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci.if (\i == 5) || (\i == 6) || (\i == 7) 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci MOVADQ ONE(%RIP),\TMP1 8048c2ecf20Sopenharmony_ci MOVADQ 0(%arg1),\TMP2 8058c2ecf20Sopenharmony_ci.irpc index, \i_seq 8068c2ecf20Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 8078c2ecf20Sopenharmony_ci.ifc \operation, dec 8088c2ecf20Sopenharmony_ci movdqa \XMM0, %xmm\index 8098c2ecf20Sopenharmony_ci.else 8108c2ecf20Sopenharmony_ci MOVADQ \XMM0, %xmm\index 8118c2ecf20Sopenharmony_ci.endif 8128c2ecf20Sopenharmony_ci pshufb %xmm14, %xmm\index # perform a 16 byte swap 8138c2ecf20Sopenharmony_ci pxor \TMP2, %xmm\index 8148c2ecf20Sopenharmony_ci.endr 8158c2ecf20Sopenharmony_ci lea 0x10(%arg1),%r10 8168c2ecf20Sopenharmony_ci mov keysize,%eax 8178c2ecf20Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 8188c2ecf20Sopenharmony_ci add $5,%eax # 128->9, 192->11, 256->13 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ciaes_loop_initial_\@: 8218c2ecf20Sopenharmony_ci MOVADQ (%r10),\TMP1 8228c2ecf20Sopenharmony_ci.irpc index, \i_seq 8238c2ecf20Sopenharmony_ci aesenc \TMP1, %xmm\index 8248c2ecf20Sopenharmony_ci.endr 8258c2ecf20Sopenharmony_ci add $16,%r10 8268c2ecf20Sopenharmony_ci sub $1,%eax 8278c2ecf20Sopenharmony_ci jnz aes_loop_initial_\@ 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci MOVADQ (%r10), \TMP1 8308c2ecf20Sopenharmony_ci.irpc index, \i_seq 8318c2ecf20Sopenharmony_ci aesenclast \TMP1, %xmm\index # Last Round 8328c2ecf20Sopenharmony_ci.endr 8338c2ecf20Sopenharmony_ci.irpc index, \i_seq 8348c2ecf20Sopenharmony_ci movdqu (%arg4 , %r11, 1), \TMP1 8358c2ecf20Sopenharmony_ci pxor \TMP1, %xmm\index 8368c2ecf20Sopenharmony_ci movdqu %xmm\index, (%arg3 , %r11, 1) 8378c2ecf20Sopenharmony_ci # write back plaintext/ciphertext for num_initial_blocks 8388c2ecf20Sopenharmony_ci add $16, %r11 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci.ifc \operation, dec 8418c2ecf20Sopenharmony_ci movdqa \TMP1, %xmm\index 8428c2ecf20Sopenharmony_ci.endif 8438c2ecf20Sopenharmony_ci pshufb %xmm14, %xmm\index 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci # prepare plaintext/ciphertext for GHASH computation 8468c2ecf20Sopenharmony_ci.endr 8478c2ecf20Sopenharmony_ci.endif 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci # apply GHASH on num_initial_blocks blocks 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci.if \i == 5 8528c2ecf20Sopenharmony_ci pxor %xmm5, %xmm6 8538c2ecf20Sopenharmony_ci GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 8548c2ecf20Sopenharmony_ci pxor %xmm6, %xmm7 8558c2ecf20Sopenharmony_ci GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 8568c2ecf20Sopenharmony_ci pxor %xmm7, %xmm8 8578c2ecf20Sopenharmony_ci GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 8588c2ecf20Sopenharmony_ci.elseif \i == 6 8598c2ecf20Sopenharmony_ci pxor %xmm6, %xmm7 8608c2ecf20Sopenharmony_ci GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 8618c2ecf20Sopenharmony_ci pxor %xmm7, %xmm8 8628c2ecf20Sopenharmony_ci GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 8638c2ecf20Sopenharmony_ci.elseif \i == 7 8648c2ecf20Sopenharmony_ci pxor %xmm7, %xmm8 8658c2ecf20Sopenharmony_ci GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 8668c2ecf20Sopenharmony_ci.endif 8678c2ecf20Sopenharmony_ci cmp $64, %r13 8688c2ecf20Sopenharmony_ci jl _initial_blocks_done\@ 8698c2ecf20Sopenharmony_ci # no need for precomputed values 8708c2ecf20Sopenharmony_ci/* 8718c2ecf20Sopenharmony_ci* 8728c2ecf20Sopenharmony_ci* Precomputations for HashKey parallel with encryption of first 4 blocks. 8738c2ecf20Sopenharmony_ci* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 8748c2ecf20Sopenharmony_ci*/ 8758c2ecf20Sopenharmony_ci MOVADQ ONE(%RIP),\TMP1 8768c2ecf20Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 8778c2ecf20Sopenharmony_ci MOVADQ \XMM0, \XMM1 8788c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM1 # perform a 16 byte swap 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 8818c2ecf20Sopenharmony_ci MOVADQ \XMM0, \XMM2 8828c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM2 # perform a 16 byte swap 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 8858c2ecf20Sopenharmony_ci MOVADQ \XMM0, \XMM3 8868c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM3 # perform a 16 byte swap 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 8898c2ecf20Sopenharmony_ci MOVADQ \XMM0, \XMM4 8908c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM4 # perform a 16 byte swap 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci MOVADQ 0(%arg1),\TMP1 8938c2ecf20Sopenharmony_ci pxor \TMP1, \XMM1 8948c2ecf20Sopenharmony_ci pxor \TMP1, \XMM2 8958c2ecf20Sopenharmony_ci pxor \TMP1, \XMM3 8968c2ecf20Sopenharmony_ci pxor \TMP1, \XMM4 8978c2ecf20Sopenharmony_ci.irpc index, 1234 # do 4 rounds 8988c2ecf20Sopenharmony_ci movaps 0x10*\index(%arg1), \TMP1 8998c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM1 9008c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM2 9018c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM3 9028c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM4 9038c2ecf20Sopenharmony_ci.endr 9048c2ecf20Sopenharmony_ci.irpc index, 56789 # do next 5 rounds 9058c2ecf20Sopenharmony_ci movaps 0x10*\index(%arg1), \TMP1 9068c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM1 9078c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM2 9088c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM3 9098c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM4 9108c2ecf20Sopenharmony_ci.endr 9118c2ecf20Sopenharmony_ci lea 0xa0(%arg1),%r10 9128c2ecf20Sopenharmony_ci mov keysize,%eax 9138c2ecf20Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 9148c2ecf20Sopenharmony_ci sub $4,%eax # 128->0, 192->2, 256->4 9158c2ecf20Sopenharmony_ci jz aes_loop_pre_done\@ 9168c2ecf20Sopenharmony_ci 9178c2ecf20Sopenharmony_ciaes_loop_pre_\@: 9188c2ecf20Sopenharmony_ci MOVADQ (%r10),\TMP2 9198c2ecf20Sopenharmony_ci.irpc index, 1234 9208c2ecf20Sopenharmony_ci aesenc \TMP2, %xmm\index 9218c2ecf20Sopenharmony_ci.endr 9228c2ecf20Sopenharmony_ci add $16,%r10 9238c2ecf20Sopenharmony_ci sub $1,%eax 9248c2ecf20Sopenharmony_ci jnz aes_loop_pre_\@ 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ciaes_loop_pre_done\@: 9278c2ecf20Sopenharmony_ci MOVADQ (%r10), \TMP2 9288c2ecf20Sopenharmony_ci aesenclast \TMP2, \XMM1 9298c2ecf20Sopenharmony_ci aesenclast \TMP2, \XMM2 9308c2ecf20Sopenharmony_ci aesenclast \TMP2, \XMM3 9318c2ecf20Sopenharmony_ci aesenclast \TMP2, \XMM4 9328c2ecf20Sopenharmony_ci movdqu 16*0(%arg4 , %r11 , 1), \TMP1 9338c2ecf20Sopenharmony_ci pxor \TMP1, \XMM1 9348c2ecf20Sopenharmony_ci.ifc \operation, dec 9358c2ecf20Sopenharmony_ci movdqu \XMM1, 16*0(%arg3 , %r11 , 1) 9368c2ecf20Sopenharmony_ci movdqa \TMP1, \XMM1 9378c2ecf20Sopenharmony_ci.endif 9388c2ecf20Sopenharmony_ci movdqu 16*1(%arg4 , %r11 , 1), \TMP1 9398c2ecf20Sopenharmony_ci pxor \TMP1, \XMM2 9408c2ecf20Sopenharmony_ci.ifc \operation, dec 9418c2ecf20Sopenharmony_ci movdqu \XMM2, 16*1(%arg3 , %r11 , 1) 9428c2ecf20Sopenharmony_ci movdqa \TMP1, \XMM2 9438c2ecf20Sopenharmony_ci.endif 9448c2ecf20Sopenharmony_ci movdqu 16*2(%arg4 , %r11 , 1), \TMP1 9458c2ecf20Sopenharmony_ci pxor \TMP1, \XMM3 9468c2ecf20Sopenharmony_ci.ifc \operation, dec 9478c2ecf20Sopenharmony_ci movdqu \XMM3, 16*2(%arg3 , %r11 , 1) 9488c2ecf20Sopenharmony_ci movdqa \TMP1, \XMM3 9498c2ecf20Sopenharmony_ci.endif 9508c2ecf20Sopenharmony_ci movdqu 16*3(%arg4 , %r11 , 1), \TMP1 9518c2ecf20Sopenharmony_ci pxor \TMP1, \XMM4 9528c2ecf20Sopenharmony_ci.ifc \operation, dec 9538c2ecf20Sopenharmony_ci movdqu \XMM4, 16*3(%arg3 , %r11 , 1) 9548c2ecf20Sopenharmony_ci movdqa \TMP1, \XMM4 9558c2ecf20Sopenharmony_ci.else 9568c2ecf20Sopenharmony_ci movdqu \XMM1, 16*0(%arg3 , %r11 , 1) 9578c2ecf20Sopenharmony_ci movdqu \XMM2, 16*1(%arg3 , %r11 , 1) 9588c2ecf20Sopenharmony_ci movdqu \XMM3, 16*2(%arg3 , %r11 , 1) 9598c2ecf20Sopenharmony_ci movdqu \XMM4, 16*3(%arg3 , %r11 , 1) 9608c2ecf20Sopenharmony_ci.endif 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci add $64, %r11 9638c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM1 # perform a 16 byte swap 9648c2ecf20Sopenharmony_ci pxor \XMMDst, \XMM1 9658c2ecf20Sopenharmony_ci# combine GHASHed value with the corresponding ciphertext 9668c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM2 # perform a 16 byte swap 9678c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM3 # perform a 16 byte swap 9688c2ecf20Sopenharmony_ci pshufb %xmm14, \XMM4 # perform a 16 byte swap 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci_initial_blocks_done\@: 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci.endm 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci/* 9758c2ecf20Sopenharmony_ci* encrypt 4 blocks at a time 9768c2ecf20Sopenharmony_ci* ghash the 4 previously encrypted ciphertext blocks 9778c2ecf20Sopenharmony_ci* arg1, %arg3, %arg4 are used as pointers only, not modified 9788c2ecf20Sopenharmony_ci* %r11 is the data offset value 9798c2ecf20Sopenharmony_ci*/ 9808c2ecf20Sopenharmony_ci.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \ 9818c2ecf20Sopenharmony_ciTMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation 9828c2ecf20Sopenharmony_ci 9838c2ecf20Sopenharmony_ci movdqa \XMM1, \XMM5 9848c2ecf20Sopenharmony_ci movdqa \XMM2, \XMM6 9858c2ecf20Sopenharmony_ci movdqa \XMM3, \XMM7 9868c2ecf20Sopenharmony_ci movdqa \XMM4, \XMM8 9878c2ecf20Sopenharmony_ci 9888c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm15 9898c2ecf20Sopenharmony_ci # multiply TMP5 * HashKey using karatsuba 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP4 9928c2ecf20Sopenharmony_ci pshufd $78, \XMM5, \TMP6 9938c2ecf20Sopenharmony_ci pxor \XMM5, \TMP6 9948c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 9958c2ecf20Sopenharmony_ci movdqu HashKey_4(%arg2), \TMP5 9968c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 9978c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM1 9988c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 9998c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM2 10008c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 10018c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM3 10028c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 10038c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM4 10048c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 10058c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 10068c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 10078c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 10088c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci pxor (%arg1), \XMM1 10118c2ecf20Sopenharmony_ci pxor (%arg1), \XMM2 10128c2ecf20Sopenharmony_ci pxor (%arg1), \XMM3 10138c2ecf20Sopenharmony_ci pxor (%arg1), \XMM4 10148c2ecf20Sopenharmony_ci movdqu HashKey_4_k(%arg2), \TMP5 10158c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) 10168c2ecf20Sopenharmony_ci movaps 0x10(%arg1), \TMP1 10178c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 1 10188c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM2 10198c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM3 10208c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM4 10218c2ecf20Sopenharmony_ci movaps 0x20(%arg1), \TMP1 10228c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 2 10238c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM2 10248c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM3 10258c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM4 10268c2ecf20Sopenharmony_ci movdqa \XMM6, \TMP1 10278c2ecf20Sopenharmony_ci pshufd $78, \XMM6, \TMP2 10288c2ecf20Sopenharmony_ci pxor \XMM6, \TMP2 10298c2ecf20Sopenharmony_ci movdqu HashKey_3(%arg2), \TMP5 10308c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 10318c2ecf20Sopenharmony_ci movaps 0x30(%arg1), \TMP3 10328c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 3 10338c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10348c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10358c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10368c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 10378c2ecf20Sopenharmony_ci movaps 0x40(%arg1), \TMP3 10388c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 4 10398c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10408c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10418c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10428c2ecf20Sopenharmony_ci movdqu HashKey_3_k(%arg2), \TMP5 10438c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 10448c2ecf20Sopenharmony_ci movaps 0x50(%arg1), \TMP3 10458c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 5 10468c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10478c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10488c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10498c2ecf20Sopenharmony_ci pxor \TMP1, \TMP4 10508c2ecf20Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 10518c2ecf20Sopenharmony_ci pxor \XMM6, \XMM5 10528c2ecf20Sopenharmony_ci pxor \TMP2, \TMP6 10538c2ecf20Sopenharmony_ci movdqa \XMM7, \TMP1 10548c2ecf20Sopenharmony_ci pshufd $78, \XMM7, \TMP2 10558c2ecf20Sopenharmony_ci pxor \XMM7, \TMP2 10568c2ecf20Sopenharmony_ci movdqu HashKey_2(%arg2), \TMP5 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci # Multiply TMP5 * HashKey using karatsuba 10598c2ecf20Sopenharmony_ci 10608c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 10618c2ecf20Sopenharmony_ci movaps 0x60(%arg1), \TMP3 10628c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 6 10638c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10648c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10658c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10668c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 10678c2ecf20Sopenharmony_ci movaps 0x70(%arg1), \TMP3 10688c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 7 10698c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10708c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10718c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10728c2ecf20Sopenharmony_ci movdqu HashKey_2_k(%arg2), \TMP5 10738c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 10748c2ecf20Sopenharmony_ci movaps 0x80(%arg1), \TMP3 10758c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 8 10768c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10778c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10788c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10798c2ecf20Sopenharmony_ci pxor \TMP1, \TMP4 10808c2ecf20Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 10818c2ecf20Sopenharmony_ci pxor \XMM7, \XMM5 10828c2ecf20Sopenharmony_ci pxor \TMP2, \TMP6 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci # Multiply XMM8 * HashKey 10858c2ecf20Sopenharmony_ci # XMM8 and TMP5 hold the values for the two operands 10868c2ecf20Sopenharmony_ci 10878c2ecf20Sopenharmony_ci movdqa \XMM8, \TMP1 10888c2ecf20Sopenharmony_ci pshufd $78, \XMM8, \TMP2 10898c2ecf20Sopenharmony_ci pxor \XMM8, \TMP2 10908c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), \TMP5 10918c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 10928c2ecf20Sopenharmony_ci movaps 0x90(%arg1), \TMP3 10938c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 9 10948c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 10958c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 10968c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 10978c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 10988c2ecf20Sopenharmony_ci lea 0xa0(%arg1),%r10 10998c2ecf20Sopenharmony_ci mov keysize,%eax 11008c2ecf20Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 11018c2ecf20Sopenharmony_ci sub $4,%eax # 128->0, 192->2, 256->4 11028c2ecf20Sopenharmony_ci jz aes_loop_par_enc_done\@ 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ciaes_loop_par_enc\@: 11058c2ecf20Sopenharmony_ci MOVADQ (%r10),\TMP3 11068c2ecf20Sopenharmony_ci.irpc index, 1234 11078c2ecf20Sopenharmony_ci aesenc \TMP3, %xmm\index 11088c2ecf20Sopenharmony_ci.endr 11098c2ecf20Sopenharmony_ci add $16,%r10 11108c2ecf20Sopenharmony_ci sub $1,%eax 11118c2ecf20Sopenharmony_ci jnz aes_loop_par_enc\@ 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ciaes_loop_par_enc_done\@: 11148c2ecf20Sopenharmony_ci MOVADQ (%r10), \TMP3 11158c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM1 # Round 10 11168c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM2 11178c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM3 11188c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM4 11198c2ecf20Sopenharmony_ci movdqu HashKey_k(%arg2), \TMP5 11208c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 11218c2ecf20Sopenharmony_ci movdqu (%arg4,%r11,1), \TMP3 11228c2ecf20Sopenharmony_ci pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK 11238c2ecf20Sopenharmony_ci movdqu 16(%arg4,%r11,1), \TMP3 11248c2ecf20Sopenharmony_ci pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK 11258c2ecf20Sopenharmony_ci movdqu 32(%arg4,%r11,1), \TMP3 11268c2ecf20Sopenharmony_ci pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK 11278c2ecf20Sopenharmony_ci movdqu 48(%arg4,%r11,1), \TMP3 11288c2ecf20Sopenharmony_ci pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK 11298c2ecf20Sopenharmony_ci movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer 11308c2ecf20Sopenharmony_ci movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer 11318c2ecf20Sopenharmony_ci movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer 11328c2ecf20Sopenharmony_ci movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer 11338c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 11348c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 11358c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 11368c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_ci pxor \TMP4, \TMP1 11398c2ecf20Sopenharmony_ci pxor \XMM8, \XMM5 11408c2ecf20Sopenharmony_ci pxor \TMP6, \TMP2 11418c2ecf20Sopenharmony_ci pxor \TMP1, \TMP2 11428c2ecf20Sopenharmony_ci pxor \XMM5, \TMP2 11438c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP3 11448c2ecf20Sopenharmony_ci pslldq $8, \TMP3 # left shift TMP3 2 DWs 11458c2ecf20Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 11468c2ecf20Sopenharmony_ci pxor \TMP3, \XMM5 11478c2ecf20Sopenharmony_ci pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci # first phase of reduction 11508c2ecf20Sopenharmony_ci 11518c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP2 11528c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP3 11538c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP4 11548c2ecf20Sopenharmony_ci# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently 11558c2ecf20Sopenharmony_ci pslld $31, \TMP2 # packed right shift << 31 11568c2ecf20Sopenharmony_ci pslld $30, \TMP3 # packed right shift << 30 11578c2ecf20Sopenharmony_ci pslld $25, \TMP4 # packed right shift << 25 11588c2ecf20Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 11598c2ecf20Sopenharmony_ci pxor \TMP4, \TMP2 11608c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP5 11618c2ecf20Sopenharmony_ci psrldq $4, \TMP5 # right shift T5 1 DW 11628c2ecf20Sopenharmony_ci pslldq $12, \TMP2 # left shift T2 3 DWs 11638c2ecf20Sopenharmony_ci pxor \TMP2, \XMM5 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci # second phase of reduction 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 11688c2ecf20Sopenharmony_ci movdqa \XMM5,\TMP3 11698c2ecf20Sopenharmony_ci movdqa \XMM5,\TMP4 11708c2ecf20Sopenharmony_ci psrld $1, \TMP2 # packed left shift >>1 11718c2ecf20Sopenharmony_ci psrld $2, \TMP3 # packed left shift >>2 11728c2ecf20Sopenharmony_ci psrld $7, \TMP4 # packed left shift >>7 11738c2ecf20Sopenharmony_ci pxor \TMP3,\TMP2 # xor the shifted versions 11748c2ecf20Sopenharmony_ci pxor \TMP4,\TMP2 11758c2ecf20Sopenharmony_ci pxor \TMP5, \TMP2 11768c2ecf20Sopenharmony_ci pxor \TMP2, \XMM5 11778c2ecf20Sopenharmony_ci pxor \TMP1, \XMM5 # result is in TMP1 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci pxor \XMM5, \XMM1 11808c2ecf20Sopenharmony_ci.endm 11818c2ecf20Sopenharmony_ci 11828c2ecf20Sopenharmony_ci/* 11838c2ecf20Sopenharmony_ci* decrypt 4 blocks at a time 11848c2ecf20Sopenharmony_ci* ghash the 4 previously decrypted ciphertext blocks 11858c2ecf20Sopenharmony_ci* arg1, %arg3, %arg4 are used as pointers only, not modified 11868c2ecf20Sopenharmony_ci* %r11 is the data offset value 11878c2ecf20Sopenharmony_ci*/ 11888c2ecf20Sopenharmony_ci.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \ 11898c2ecf20Sopenharmony_ciTMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci movdqa \XMM1, \XMM5 11928c2ecf20Sopenharmony_ci movdqa \XMM2, \XMM6 11938c2ecf20Sopenharmony_ci movdqa \XMM3, \XMM7 11948c2ecf20Sopenharmony_ci movdqa \XMM4, \XMM8 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm15 11978c2ecf20Sopenharmony_ci # multiply TMP5 * HashKey using karatsuba 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP4 12008c2ecf20Sopenharmony_ci pshufd $78, \XMM5, \TMP6 12018c2ecf20Sopenharmony_ci pxor \XMM5, \TMP6 12028c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 12038c2ecf20Sopenharmony_ci movdqu HashKey_4(%arg2), \TMP5 12048c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 12058c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM1 12068c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 12078c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM2 12088c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 12098c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM3 12108c2ecf20Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 12118c2ecf20Sopenharmony_ci movdqa \XMM0, \XMM4 12128c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 12138c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 12148c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 12158c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 12168c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 12178c2ecf20Sopenharmony_ci 12188c2ecf20Sopenharmony_ci pxor (%arg1), \XMM1 12198c2ecf20Sopenharmony_ci pxor (%arg1), \XMM2 12208c2ecf20Sopenharmony_ci pxor (%arg1), \XMM3 12218c2ecf20Sopenharmony_ci pxor (%arg1), \XMM4 12228c2ecf20Sopenharmony_ci movdqu HashKey_4_k(%arg2), \TMP5 12238c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) 12248c2ecf20Sopenharmony_ci movaps 0x10(%arg1), \TMP1 12258c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 1 12268c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM2 12278c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM3 12288c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM4 12298c2ecf20Sopenharmony_ci movaps 0x20(%arg1), \TMP1 12308c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 2 12318c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM2 12328c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM3 12338c2ecf20Sopenharmony_ci aesenc \TMP1, \XMM4 12348c2ecf20Sopenharmony_ci movdqa \XMM6, \TMP1 12358c2ecf20Sopenharmony_ci pshufd $78, \XMM6, \TMP2 12368c2ecf20Sopenharmony_ci pxor \XMM6, \TMP2 12378c2ecf20Sopenharmony_ci movdqu HashKey_3(%arg2), \TMP5 12388c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 12398c2ecf20Sopenharmony_ci movaps 0x30(%arg1), \TMP3 12408c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 3 12418c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 12428c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 12438c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 12448c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 12458c2ecf20Sopenharmony_ci movaps 0x40(%arg1), \TMP3 12468c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 4 12478c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 12488c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 12498c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 12508c2ecf20Sopenharmony_ci movdqu HashKey_3_k(%arg2), \TMP5 12518c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 12528c2ecf20Sopenharmony_ci movaps 0x50(%arg1), \TMP3 12538c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 5 12548c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 12558c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 12568c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 12578c2ecf20Sopenharmony_ci pxor \TMP1, \TMP4 12588c2ecf20Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 12598c2ecf20Sopenharmony_ci pxor \XMM6, \XMM5 12608c2ecf20Sopenharmony_ci pxor \TMP2, \TMP6 12618c2ecf20Sopenharmony_ci movdqa \XMM7, \TMP1 12628c2ecf20Sopenharmony_ci pshufd $78, \XMM7, \TMP2 12638c2ecf20Sopenharmony_ci pxor \XMM7, \TMP2 12648c2ecf20Sopenharmony_ci movdqu HashKey_2(%arg2), \TMP5 12658c2ecf20Sopenharmony_ci 12668c2ecf20Sopenharmony_ci # Multiply TMP5 * HashKey using karatsuba 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 12698c2ecf20Sopenharmony_ci movaps 0x60(%arg1), \TMP3 12708c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 6 12718c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 12728c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 12738c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 12748c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 12758c2ecf20Sopenharmony_ci movaps 0x70(%arg1), \TMP3 12768c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 7 12778c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 12788c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 12798c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 12808c2ecf20Sopenharmony_ci movdqu HashKey_2_k(%arg2), \TMP5 12818c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 12828c2ecf20Sopenharmony_ci movaps 0x80(%arg1), \TMP3 12838c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 8 12848c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 12858c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 12868c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 12878c2ecf20Sopenharmony_ci pxor \TMP1, \TMP4 12888c2ecf20Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 12898c2ecf20Sopenharmony_ci pxor \XMM7, \XMM5 12908c2ecf20Sopenharmony_ci pxor \TMP2, \TMP6 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci # Multiply XMM8 * HashKey 12938c2ecf20Sopenharmony_ci # XMM8 and TMP5 hold the values for the two operands 12948c2ecf20Sopenharmony_ci 12958c2ecf20Sopenharmony_ci movdqa \XMM8, \TMP1 12968c2ecf20Sopenharmony_ci pshufd $78, \XMM8, \TMP2 12978c2ecf20Sopenharmony_ci pxor \XMM8, \TMP2 12988c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), \TMP5 12998c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 13008c2ecf20Sopenharmony_ci movaps 0x90(%arg1), \TMP3 13018c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 9 13028c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM2 13038c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM3 13048c2ecf20Sopenharmony_ci aesenc \TMP3, \XMM4 13058c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 13068c2ecf20Sopenharmony_ci lea 0xa0(%arg1),%r10 13078c2ecf20Sopenharmony_ci mov keysize,%eax 13088c2ecf20Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 13098c2ecf20Sopenharmony_ci sub $4,%eax # 128->0, 192->2, 256->4 13108c2ecf20Sopenharmony_ci jz aes_loop_par_dec_done\@ 13118c2ecf20Sopenharmony_ci 13128c2ecf20Sopenharmony_ciaes_loop_par_dec\@: 13138c2ecf20Sopenharmony_ci MOVADQ (%r10),\TMP3 13148c2ecf20Sopenharmony_ci.irpc index, 1234 13158c2ecf20Sopenharmony_ci aesenc \TMP3, %xmm\index 13168c2ecf20Sopenharmony_ci.endr 13178c2ecf20Sopenharmony_ci add $16,%r10 13188c2ecf20Sopenharmony_ci sub $1,%eax 13198c2ecf20Sopenharmony_ci jnz aes_loop_par_dec\@ 13208c2ecf20Sopenharmony_ci 13218c2ecf20Sopenharmony_ciaes_loop_par_dec_done\@: 13228c2ecf20Sopenharmony_ci MOVADQ (%r10), \TMP3 13238c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM1 # last round 13248c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM2 13258c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM3 13268c2ecf20Sopenharmony_ci aesenclast \TMP3, \XMM4 13278c2ecf20Sopenharmony_ci movdqu HashKey_k(%arg2), \TMP5 13288c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 13298c2ecf20Sopenharmony_ci movdqu (%arg4,%r11,1), \TMP3 13308c2ecf20Sopenharmony_ci pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK 13318c2ecf20Sopenharmony_ci movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer 13328c2ecf20Sopenharmony_ci movdqa \TMP3, \XMM1 13338c2ecf20Sopenharmony_ci movdqu 16(%arg4,%r11,1), \TMP3 13348c2ecf20Sopenharmony_ci pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK 13358c2ecf20Sopenharmony_ci movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer 13368c2ecf20Sopenharmony_ci movdqa \TMP3, \XMM2 13378c2ecf20Sopenharmony_ci movdqu 32(%arg4,%r11,1), \TMP3 13388c2ecf20Sopenharmony_ci pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK 13398c2ecf20Sopenharmony_ci movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer 13408c2ecf20Sopenharmony_ci movdqa \TMP3, \XMM3 13418c2ecf20Sopenharmony_ci movdqu 48(%arg4,%r11,1), \TMP3 13428c2ecf20Sopenharmony_ci pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK 13438c2ecf20Sopenharmony_ci movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer 13448c2ecf20Sopenharmony_ci movdqa \TMP3, \XMM4 13458c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 13468c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 13478c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 13488c2ecf20Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci pxor \TMP4, \TMP1 13518c2ecf20Sopenharmony_ci pxor \XMM8, \XMM5 13528c2ecf20Sopenharmony_ci pxor \TMP6, \TMP2 13538c2ecf20Sopenharmony_ci pxor \TMP1, \TMP2 13548c2ecf20Sopenharmony_ci pxor \XMM5, \TMP2 13558c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP3 13568c2ecf20Sopenharmony_ci pslldq $8, \TMP3 # left shift TMP3 2 DWs 13578c2ecf20Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 13588c2ecf20Sopenharmony_ci pxor \TMP3, \XMM5 13598c2ecf20Sopenharmony_ci pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci # first phase of reduction 13628c2ecf20Sopenharmony_ci 13638c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP2 13648c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP3 13658c2ecf20Sopenharmony_ci movdqa \XMM5, \TMP4 13668c2ecf20Sopenharmony_ci# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently 13678c2ecf20Sopenharmony_ci pslld $31, \TMP2 # packed right shift << 31 13688c2ecf20Sopenharmony_ci pslld $30, \TMP3 # packed right shift << 30 13698c2ecf20Sopenharmony_ci pslld $25, \TMP4 # packed right shift << 25 13708c2ecf20Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 13718c2ecf20Sopenharmony_ci pxor \TMP4, \TMP2 13728c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP5 13738c2ecf20Sopenharmony_ci psrldq $4, \TMP5 # right shift T5 1 DW 13748c2ecf20Sopenharmony_ci pslldq $12, \TMP2 # left shift T2 3 DWs 13758c2ecf20Sopenharmony_ci pxor \TMP2, \XMM5 13768c2ecf20Sopenharmony_ci 13778c2ecf20Sopenharmony_ci # second phase of reduction 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 13808c2ecf20Sopenharmony_ci movdqa \XMM5,\TMP3 13818c2ecf20Sopenharmony_ci movdqa \XMM5,\TMP4 13828c2ecf20Sopenharmony_ci psrld $1, \TMP2 # packed left shift >>1 13838c2ecf20Sopenharmony_ci psrld $2, \TMP3 # packed left shift >>2 13848c2ecf20Sopenharmony_ci psrld $7, \TMP4 # packed left shift >>7 13858c2ecf20Sopenharmony_ci pxor \TMP3,\TMP2 # xor the shifted versions 13868c2ecf20Sopenharmony_ci pxor \TMP4,\TMP2 13878c2ecf20Sopenharmony_ci pxor \TMP5, \TMP2 13888c2ecf20Sopenharmony_ci pxor \TMP2, \XMM5 13898c2ecf20Sopenharmony_ci pxor \TMP1, \XMM5 # result is in TMP1 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_ci pxor \XMM5, \XMM1 13928c2ecf20Sopenharmony_ci.endm 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_ci/* GHASH the last 4 ciphertext blocks. */ 13958c2ecf20Sopenharmony_ci.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ 13968c2ecf20Sopenharmony_ciTMP7 XMM1 XMM2 XMM3 XMM4 XMMDst 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_ci # Multiply TMP6 * HashKey (using Karatsuba) 13998c2ecf20Sopenharmony_ci 14008c2ecf20Sopenharmony_ci movdqa \XMM1, \TMP6 14018c2ecf20Sopenharmony_ci pshufd $78, \XMM1, \TMP2 14028c2ecf20Sopenharmony_ci pxor \XMM1, \TMP2 14038c2ecf20Sopenharmony_ci movdqu HashKey_4(%arg2), \TMP5 14048c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1 14058c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0 14068c2ecf20Sopenharmony_ci movdqu HashKey_4_k(%arg2), \TMP4 14078c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 14088c2ecf20Sopenharmony_ci movdqa \XMM1, \XMMDst 14098c2ecf20Sopenharmony_ci movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 14108c2ecf20Sopenharmony_ci 14118c2ecf20Sopenharmony_ci # Multiply TMP1 * HashKey (using Karatsuba) 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci movdqa \XMM2, \TMP1 14148c2ecf20Sopenharmony_ci pshufd $78, \XMM2, \TMP2 14158c2ecf20Sopenharmony_ci pxor \XMM2, \TMP2 14168c2ecf20Sopenharmony_ci movdqu HashKey_3(%arg2), \TMP5 14178c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 14188c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0 14198c2ecf20Sopenharmony_ci movdqu HashKey_3_k(%arg2), \TMP4 14208c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 14218c2ecf20Sopenharmony_ci pxor \TMP1, \TMP6 14228c2ecf20Sopenharmony_ci pxor \XMM2, \XMMDst 14238c2ecf20Sopenharmony_ci pxor \TMP2, \XMM1 14248c2ecf20Sopenharmony_ci# results accumulated in TMP6, XMMDst, XMM1 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci # Multiply TMP1 * HashKey (using Karatsuba) 14278c2ecf20Sopenharmony_ci 14288c2ecf20Sopenharmony_ci movdqa \XMM3, \TMP1 14298c2ecf20Sopenharmony_ci pshufd $78, \XMM3, \TMP2 14308c2ecf20Sopenharmony_ci pxor \XMM3, \TMP2 14318c2ecf20Sopenharmony_ci movdqu HashKey_2(%arg2), \TMP5 14328c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 14338c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0 14348c2ecf20Sopenharmony_ci movdqu HashKey_2_k(%arg2), \TMP4 14358c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 14368c2ecf20Sopenharmony_ci pxor \TMP1, \TMP6 14378c2ecf20Sopenharmony_ci pxor \XMM3, \XMMDst 14388c2ecf20Sopenharmony_ci pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 14398c2ecf20Sopenharmony_ci 14408c2ecf20Sopenharmony_ci # Multiply TMP1 * HashKey (using Karatsuba) 14418c2ecf20Sopenharmony_ci movdqa \XMM4, \TMP1 14428c2ecf20Sopenharmony_ci pshufd $78, \XMM4, \TMP2 14438c2ecf20Sopenharmony_ci pxor \XMM4, \TMP2 14448c2ecf20Sopenharmony_ci movdqu HashKey(%arg2), \TMP5 14458c2ecf20Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 14468c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0 14478c2ecf20Sopenharmony_ci movdqu HashKey_k(%arg2), \TMP4 14488c2ecf20Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 14498c2ecf20Sopenharmony_ci pxor \TMP1, \TMP6 14508c2ecf20Sopenharmony_ci pxor \XMM4, \XMMDst 14518c2ecf20Sopenharmony_ci pxor \XMM1, \TMP2 14528c2ecf20Sopenharmony_ci pxor \TMP6, \TMP2 14538c2ecf20Sopenharmony_ci pxor \XMMDst, \TMP2 14548c2ecf20Sopenharmony_ci # middle section of the temp results combined as in karatsuba algorithm 14558c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP4 14568c2ecf20Sopenharmony_ci pslldq $8, \TMP4 # left shift TMP4 2 DWs 14578c2ecf20Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 14588c2ecf20Sopenharmony_ci pxor \TMP4, \XMMDst 14598c2ecf20Sopenharmony_ci pxor \TMP2, \TMP6 14608c2ecf20Sopenharmony_ci# TMP6:XMMDst holds the result of the accumulated carry-less multiplications 14618c2ecf20Sopenharmony_ci # first phase of the reduction 14628c2ecf20Sopenharmony_ci movdqa \XMMDst, \TMP2 14638c2ecf20Sopenharmony_ci movdqa \XMMDst, \TMP3 14648c2ecf20Sopenharmony_ci movdqa \XMMDst, \TMP4 14658c2ecf20Sopenharmony_ci# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently 14668c2ecf20Sopenharmony_ci pslld $31, \TMP2 # packed right shifting << 31 14678c2ecf20Sopenharmony_ci pslld $30, \TMP3 # packed right shifting << 30 14688c2ecf20Sopenharmony_ci pslld $25, \TMP4 # packed right shifting << 25 14698c2ecf20Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 14708c2ecf20Sopenharmony_ci pxor \TMP4, \TMP2 14718c2ecf20Sopenharmony_ci movdqa \TMP2, \TMP7 14728c2ecf20Sopenharmony_ci psrldq $4, \TMP7 # right shift TMP7 1 DW 14738c2ecf20Sopenharmony_ci pslldq $12, \TMP2 # left shift TMP2 3 DWs 14748c2ecf20Sopenharmony_ci pxor \TMP2, \XMMDst 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci # second phase of the reduction 14778c2ecf20Sopenharmony_ci movdqa \XMMDst, \TMP2 14788c2ecf20Sopenharmony_ci # make 3 copies of XMMDst for doing 3 shift operations 14798c2ecf20Sopenharmony_ci movdqa \XMMDst, \TMP3 14808c2ecf20Sopenharmony_ci movdqa \XMMDst, \TMP4 14818c2ecf20Sopenharmony_ci psrld $1, \TMP2 # packed left shift >> 1 14828c2ecf20Sopenharmony_ci psrld $2, \TMP3 # packed left shift >> 2 14838c2ecf20Sopenharmony_ci psrld $7, \TMP4 # packed left shift >> 7 14848c2ecf20Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 14858c2ecf20Sopenharmony_ci pxor \TMP4, \TMP2 14868c2ecf20Sopenharmony_ci pxor \TMP7, \TMP2 14878c2ecf20Sopenharmony_ci pxor \TMP2, \XMMDst 14888c2ecf20Sopenharmony_ci pxor \TMP6, \XMMDst # reduced result is in XMMDst 14898c2ecf20Sopenharmony_ci.endm 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_ci/* Encryption of a single block 14938c2ecf20Sopenharmony_ci* uses eax & r10 14948c2ecf20Sopenharmony_ci*/ 14958c2ecf20Sopenharmony_ci 14968c2ecf20Sopenharmony_ci.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci pxor (%arg1), \XMM0 14998c2ecf20Sopenharmony_ci mov keysize,%eax 15008c2ecf20Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 15018c2ecf20Sopenharmony_ci add $5,%eax # 128->9, 192->11, 256->13 15028c2ecf20Sopenharmony_ci lea 16(%arg1), %r10 # get first expanded key address 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci_esb_loop_\@: 15058c2ecf20Sopenharmony_ci MOVADQ (%r10),\TMP1 15068c2ecf20Sopenharmony_ci aesenc \TMP1,\XMM0 15078c2ecf20Sopenharmony_ci add $16,%r10 15088c2ecf20Sopenharmony_ci sub $1,%eax 15098c2ecf20Sopenharmony_ci jnz _esb_loop_\@ 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci MOVADQ (%r10),\TMP1 15128c2ecf20Sopenharmony_ci aesenclast \TMP1,\XMM0 15138c2ecf20Sopenharmony_ci.endm 15148c2ecf20Sopenharmony_ci/***************************************************************************** 15158c2ecf20Sopenharmony_ci* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 15168c2ecf20Sopenharmony_ci* struct gcm_context_data *data 15178c2ecf20Sopenharmony_ci* // Context data 15188c2ecf20Sopenharmony_ci* u8 *out, // Plaintext output. Encrypt in-place is allowed. 15198c2ecf20Sopenharmony_ci* const u8 *in, // Ciphertext input 15208c2ecf20Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for decryption. 15218c2ecf20Sopenharmony_ci* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) 15228c2ecf20Sopenharmony_ci* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) 15238c2ecf20Sopenharmony_ci* // concatenated with 0x00000001. 16-byte aligned pointer. 15248c2ecf20Sopenharmony_ci* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. 15258c2ecf20Sopenharmony_ci* const u8 *aad, // Additional Authentication Data (AAD) 15268c2ecf20Sopenharmony_ci* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes 15278c2ecf20Sopenharmony_ci* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the 15288c2ecf20Sopenharmony_ci* // given authentication tag and only return the plaintext if they match. 15298c2ecf20Sopenharmony_ci* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 15308c2ecf20Sopenharmony_ci* // (most likely), 12 or 8. 15318c2ecf20Sopenharmony_ci* 15328c2ecf20Sopenharmony_ci* Assumptions: 15338c2ecf20Sopenharmony_ci* 15348c2ecf20Sopenharmony_ci* keys: 15358c2ecf20Sopenharmony_ci* keys are pre-expanded and aligned to 16 bytes. we are using the first 15368c2ecf20Sopenharmony_ci* set of 11 keys in the data structure void *aes_ctx 15378c2ecf20Sopenharmony_ci* 15388c2ecf20Sopenharmony_ci* iv: 15398c2ecf20Sopenharmony_ci* 0 1 2 3 15408c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 15418c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15428c2ecf20Sopenharmony_ci* | Salt (From the SA) | 15438c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15448c2ecf20Sopenharmony_ci* | Initialization Vector | 15458c2ecf20Sopenharmony_ci* | (This is the sequence number from IPSec header) | 15468c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15478c2ecf20Sopenharmony_ci* | 0x1 | 15488c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15498c2ecf20Sopenharmony_ci* 15508c2ecf20Sopenharmony_ci* 15518c2ecf20Sopenharmony_ci* 15528c2ecf20Sopenharmony_ci* AAD: 15538c2ecf20Sopenharmony_ci* AAD padded to 128 bits with 0 15548c2ecf20Sopenharmony_ci* for example, assume AAD is a u32 vector 15558c2ecf20Sopenharmony_ci* 15568c2ecf20Sopenharmony_ci* if AAD is 8 bytes: 15578c2ecf20Sopenharmony_ci* AAD[3] = {A0, A1}; 15588c2ecf20Sopenharmony_ci* padded AAD in xmm register = {A1 A0 0 0} 15598c2ecf20Sopenharmony_ci* 15608c2ecf20Sopenharmony_ci* 0 1 2 3 15618c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 15628c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15638c2ecf20Sopenharmony_ci* | SPI (A1) | 15648c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15658c2ecf20Sopenharmony_ci* | 32-bit Sequence Number (A0) | 15668c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15678c2ecf20Sopenharmony_ci* | 0x0 | 15688c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15698c2ecf20Sopenharmony_ci* 15708c2ecf20Sopenharmony_ci* AAD Format with 32-bit Sequence Number 15718c2ecf20Sopenharmony_ci* 15728c2ecf20Sopenharmony_ci* if AAD is 12 bytes: 15738c2ecf20Sopenharmony_ci* AAD[3] = {A0, A1, A2}; 15748c2ecf20Sopenharmony_ci* padded AAD in xmm register = {A2 A1 A0 0} 15758c2ecf20Sopenharmony_ci* 15768c2ecf20Sopenharmony_ci* 0 1 2 3 15778c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 15788c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15798c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 15808c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15818c2ecf20Sopenharmony_ci* | SPI (A2) | 15828c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15838c2ecf20Sopenharmony_ci* | 64-bit Extended Sequence Number {A1,A0} | 15848c2ecf20Sopenharmony_ci* | | 15858c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15868c2ecf20Sopenharmony_ci* | 0x0 | 15878c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 15888c2ecf20Sopenharmony_ci* 15898c2ecf20Sopenharmony_ci* AAD Format with 64-bit Extended Sequence Number 15908c2ecf20Sopenharmony_ci* 15918c2ecf20Sopenharmony_ci* poly = x^128 + x^127 + x^126 + x^121 + 1 15928c2ecf20Sopenharmony_ci* 15938c2ecf20Sopenharmony_ci*****************************************************************************/ 15948c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec) 15958c2ecf20Sopenharmony_ci FUNC_SAVE 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci GCM_INIT %arg6, arg7, arg8, arg9 15988c2ecf20Sopenharmony_ci GCM_ENC_DEC dec 15998c2ecf20Sopenharmony_ci GCM_COMPLETE arg10, arg11 16008c2ecf20Sopenharmony_ci FUNC_RESTORE 16018c2ecf20Sopenharmony_ci RET 16028c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec) 16038c2ecf20Sopenharmony_ci 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci/***************************************************************************** 16068c2ecf20Sopenharmony_ci* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 16078c2ecf20Sopenharmony_ci* struct gcm_context_data *data 16088c2ecf20Sopenharmony_ci* // Context data 16098c2ecf20Sopenharmony_ci* u8 *out, // Ciphertext output. Encrypt in-place is allowed. 16108c2ecf20Sopenharmony_ci* const u8 *in, // Plaintext input 16118c2ecf20Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for encryption. 16128c2ecf20Sopenharmony_ci* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) 16138c2ecf20Sopenharmony_ci* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) 16148c2ecf20Sopenharmony_ci* // concatenated with 0x00000001. 16-byte aligned pointer. 16158c2ecf20Sopenharmony_ci* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. 16168c2ecf20Sopenharmony_ci* const u8 *aad, // Additional Authentication Data (AAD) 16178c2ecf20Sopenharmony_ci* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes 16188c2ecf20Sopenharmony_ci* u8 *auth_tag, // Authenticated Tag output. 16198c2ecf20Sopenharmony_ci* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), 16208c2ecf20Sopenharmony_ci* // 12 or 8. 16218c2ecf20Sopenharmony_ci* 16228c2ecf20Sopenharmony_ci* Assumptions: 16238c2ecf20Sopenharmony_ci* 16248c2ecf20Sopenharmony_ci* keys: 16258c2ecf20Sopenharmony_ci* keys are pre-expanded and aligned to 16 bytes. we are using the 16268c2ecf20Sopenharmony_ci* first set of 11 keys in the data structure void *aes_ctx 16278c2ecf20Sopenharmony_ci* 16288c2ecf20Sopenharmony_ci* 16298c2ecf20Sopenharmony_ci* iv: 16308c2ecf20Sopenharmony_ci* 0 1 2 3 16318c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 16328c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16338c2ecf20Sopenharmony_ci* | Salt (From the SA) | 16348c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16358c2ecf20Sopenharmony_ci* | Initialization Vector | 16368c2ecf20Sopenharmony_ci* | (This is the sequence number from IPSec header) | 16378c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16388c2ecf20Sopenharmony_ci* | 0x1 | 16398c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16408c2ecf20Sopenharmony_ci* 16418c2ecf20Sopenharmony_ci* 16428c2ecf20Sopenharmony_ci* 16438c2ecf20Sopenharmony_ci* AAD: 16448c2ecf20Sopenharmony_ci* AAD padded to 128 bits with 0 16458c2ecf20Sopenharmony_ci* for example, assume AAD is a u32 vector 16468c2ecf20Sopenharmony_ci* 16478c2ecf20Sopenharmony_ci* if AAD is 8 bytes: 16488c2ecf20Sopenharmony_ci* AAD[3] = {A0, A1}; 16498c2ecf20Sopenharmony_ci* padded AAD in xmm register = {A1 A0 0 0} 16508c2ecf20Sopenharmony_ci* 16518c2ecf20Sopenharmony_ci* 0 1 2 3 16528c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 16538c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16548c2ecf20Sopenharmony_ci* | SPI (A1) | 16558c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16568c2ecf20Sopenharmony_ci* | 32-bit Sequence Number (A0) | 16578c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16588c2ecf20Sopenharmony_ci* | 0x0 | 16598c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16608c2ecf20Sopenharmony_ci* 16618c2ecf20Sopenharmony_ci* AAD Format with 32-bit Sequence Number 16628c2ecf20Sopenharmony_ci* 16638c2ecf20Sopenharmony_ci* if AAD is 12 bytes: 16648c2ecf20Sopenharmony_ci* AAD[3] = {A0, A1, A2}; 16658c2ecf20Sopenharmony_ci* padded AAD in xmm register = {A2 A1 A0 0} 16668c2ecf20Sopenharmony_ci* 16678c2ecf20Sopenharmony_ci* 0 1 2 3 16688c2ecf20Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 16698c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16708c2ecf20Sopenharmony_ci* | SPI (A2) | 16718c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16728c2ecf20Sopenharmony_ci* | 64-bit Extended Sequence Number {A1,A0} | 16738c2ecf20Sopenharmony_ci* | | 16748c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16758c2ecf20Sopenharmony_ci* | 0x0 | 16768c2ecf20Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16778c2ecf20Sopenharmony_ci* 16788c2ecf20Sopenharmony_ci* AAD Format with 64-bit Extended Sequence Number 16798c2ecf20Sopenharmony_ci* 16808c2ecf20Sopenharmony_ci* poly = x^128 + x^127 + x^126 + x^121 + 1 16818c2ecf20Sopenharmony_ci***************************************************************************/ 16828c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc) 16838c2ecf20Sopenharmony_ci FUNC_SAVE 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ci GCM_INIT %arg6, arg7, arg8, arg9 16868c2ecf20Sopenharmony_ci GCM_ENC_DEC enc 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_ci GCM_COMPLETE arg10, arg11 16898c2ecf20Sopenharmony_ci FUNC_RESTORE 16908c2ecf20Sopenharmony_ci RET 16918c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc) 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci/***************************************************************************** 16948c2ecf20Sopenharmony_ci* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 16958c2ecf20Sopenharmony_ci* struct gcm_context_data *data, 16968c2ecf20Sopenharmony_ci* // context data 16978c2ecf20Sopenharmony_ci* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) 16988c2ecf20Sopenharmony_ci* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) 16998c2ecf20Sopenharmony_ci* // concatenated with 0x00000001. 16-byte aligned pointer. 17008c2ecf20Sopenharmony_ci* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. 17018c2ecf20Sopenharmony_ci* const u8 *aad, // Additional Authentication Data (AAD) 17028c2ecf20Sopenharmony_ci* u64 aad_len) // Length of AAD in bytes. 17038c2ecf20Sopenharmony_ci*/ 17048c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_init) 17058c2ecf20Sopenharmony_ci FUNC_SAVE 17068c2ecf20Sopenharmony_ci GCM_INIT %arg3, %arg4,%arg5, %arg6 17078c2ecf20Sopenharmony_ci FUNC_RESTORE 17088c2ecf20Sopenharmony_ci RET 17098c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_init) 17108c2ecf20Sopenharmony_ci 17118c2ecf20Sopenharmony_ci/***************************************************************************** 17128c2ecf20Sopenharmony_ci* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 17138c2ecf20Sopenharmony_ci* struct gcm_context_data *data, 17148c2ecf20Sopenharmony_ci* // context data 17158c2ecf20Sopenharmony_ci* u8 *out, // Ciphertext output. Encrypt in-place is allowed. 17168c2ecf20Sopenharmony_ci* const u8 *in, // Plaintext input 17178c2ecf20Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for encryption. 17188c2ecf20Sopenharmony_ci*/ 17198c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc_update) 17208c2ecf20Sopenharmony_ci FUNC_SAVE 17218c2ecf20Sopenharmony_ci GCM_ENC_DEC enc 17228c2ecf20Sopenharmony_ci FUNC_RESTORE 17238c2ecf20Sopenharmony_ci RET 17248c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc_update) 17258c2ecf20Sopenharmony_ci 17268c2ecf20Sopenharmony_ci/***************************************************************************** 17278c2ecf20Sopenharmony_ci* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 17288c2ecf20Sopenharmony_ci* struct gcm_context_data *data, 17298c2ecf20Sopenharmony_ci* // context data 17308c2ecf20Sopenharmony_ci* u8 *out, // Ciphertext output. Encrypt in-place is allowed. 17318c2ecf20Sopenharmony_ci* const u8 *in, // Plaintext input 17328c2ecf20Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for encryption. 17338c2ecf20Sopenharmony_ci*/ 17348c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec_update) 17358c2ecf20Sopenharmony_ci FUNC_SAVE 17368c2ecf20Sopenharmony_ci GCM_ENC_DEC dec 17378c2ecf20Sopenharmony_ci FUNC_RESTORE 17388c2ecf20Sopenharmony_ci RET 17398c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec_update) 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci/***************************************************************************** 17428c2ecf20Sopenharmony_ci* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 17438c2ecf20Sopenharmony_ci* struct gcm_context_data *data, 17448c2ecf20Sopenharmony_ci* // context data 17458c2ecf20Sopenharmony_ci* u8 *auth_tag, // Authenticated Tag output. 17468c2ecf20Sopenharmony_ci* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), 17478c2ecf20Sopenharmony_ci* // 12 or 8. 17488c2ecf20Sopenharmony_ci*/ 17498c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_gcm_finalize) 17508c2ecf20Sopenharmony_ci FUNC_SAVE 17518c2ecf20Sopenharmony_ci GCM_COMPLETE %arg3 %arg4 17528c2ecf20Sopenharmony_ci FUNC_RESTORE 17538c2ecf20Sopenharmony_ci RET 17548c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_gcm_finalize) 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci#endif 17578c2ecf20Sopenharmony_ci 17588c2ecf20Sopenharmony_ci 17598c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) 17608c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_256a) 17618c2ecf20Sopenharmony_ci pshufd $0b11111111, %xmm1, %xmm1 17628c2ecf20Sopenharmony_ci shufps $0b00010000, %xmm0, %xmm4 17638c2ecf20Sopenharmony_ci pxor %xmm4, %xmm0 17648c2ecf20Sopenharmony_ci shufps $0b10001100, %xmm0, %xmm4 17658c2ecf20Sopenharmony_ci pxor %xmm4, %xmm0 17668c2ecf20Sopenharmony_ci pxor %xmm1, %xmm0 17678c2ecf20Sopenharmony_ci movaps %xmm0, (TKEYP) 17688c2ecf20Sopenharmony_ci add $0x10, TKEYP 17698c2ecf20Sopenharmony_ci RET 17708c2ecf20Sopenharmony_ciSYM_FUNC_END(_key_expansion_256a) 17718c2ecf20Sopenharmony_ciSYM_FUNC_END_ALIAS(_key_expansion_128) 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_192a) 17748c2ecf20Sopenharmony_ci pshufd $0b01010101, %xmm1, %xmm1 17758c2ecf20Sopenharmony_ci shufps $0b00010000, %xmm0, %xmm4 17768c2ecf20Sopenharmony_ci pxor %xmm4, %xmm0 17778c2ecf20Sopenharmony_ci shufps $0b10001100, %xmm0, %xmm4 17788c2ecf20Sopenharmony_ci pxor %xmm4, %xmm0 17798c2ecf20Sopenharmony_ci pxor %xmm1, %xmm0 17808c2ecf20Sopenharmony_ci 17818c2ecf20Sopenharmony_ci movaps %xmm2, %xmm5 17828c2ecf20Sopenharmony_ci movaps %xmm2, %xmm6 17838c2ecf20Sopenharmony_ci pslldq $4, %xmm5 17848c2ecf20Sopenharmony_ci pshufd $0b11111111, %xmm0, %xmm3 17858c2ecf20Sopenharmony_ci pxor %xmm3, %xmm2 17868c2ecf20Sopenharmony_ci pxor %xmm5, %xmm2 17878c2ecf20Sopenharmony_ci 17888c2ecf20Sopenharmony_ci movaps %xmm0, %xmm1 17898c2ecf20Sopenharmony_ci shufps $0b01000100, %xmm0, %xmm6 17908c2ecf20Sopenharmony_ci movaps %xmm6, (TKEYP) 17918c2ecf20Sopenharmony_ci shufps $0b01001110, %xmm2, %xmm1 17928c2ecf20Sopenharmony_ci movaps %xmm1, 0x10(TKEYP) 17938c2ecf20Sopenharmony_ci add $0x20, TKEYP 17948c2ecf20Sopenharmony_ci RET 17958c2ecf20Sopenharmony_ciSYM_FUNC_END(_key_expansion_192a) 17968c2ecf20Sopenharmony_ci 17978c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_192b) 17988c2ecf20Sopenharmony_ci pshufd $0b01010101, %xmm1, %xmm1 17998c2ecf20Sopenharmony_ci shufps $0b00010000, %xmm0, %xmm4 18008c2ecf20Sopenharmony_ci pxor %xmm4, %xmm0 18018c2ecf20Sopenharmony_ci shufps $0b10001100, %xmm0, %xmm4 18028c2ecf20Sopenharmony_ci pxor %xmm4, %xmm0 18038c2ecf20Sopenharmony_ci pxor %xmm1, %xmm0 18048c2ecf20Sopenharmony_ci 18058c2ecf20Sopenharmony_ci movaps %xmm2, %xmm5 18068c2ecf20Sopenharmony_ci pslldq $4, %xmm5 18078c2ecf20Sopenharmony_ci pshufd $0b11111111, %xmm0, %xmm3 18088c2ecf20Sopenharmony_ci pxor %xmm3, %xmm2 18098c2ecf20Sopenharmony_ci pxor %xmm5, %xmm2 18108c2ecf20Sopenharmony_ci 18118c2ecf20Sopenharmony_ci movaps %xmm0, (TKEYP) 18128c2ecf20Sopenharmony_ci add $0x10, TKEYP 18138c2ecf20Sopenharmony_ci RET 18148c2ecf20Sopenharmony_ciSYM_FUNC_END(_key_expansion_192b) 18158c2ecf20Sopenharmony_ci 18168c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_256b) 18178c2ecf20Sopenharmony_ci pshufd $0b10101010, %xmm1, %xmm1 18188c2ecf20Sopenharmony_ci shufps $0b00010000, %xmm2, %xmm4 18198c2ecf20Sopenharmony_ci pxor %xmm4, %xmm2 18208c2ecf20Sopenharmony_ci shufps $0b10001100, %xmm2, %xmm4 18218c2ecf20Sopenharmony_ci pxor %xmm4, %xmm2 18228c2ecf20Sopenharmony_ci pxor %xmm1, %xmm2 18238c2ecf20Sopenharmony_ci movaps %xmm2, (TKEYP) 18248c2ecf20Sopenharmony_ci add $0x10, TKEYP 18258c2ecf20Sopenharmony_ci RET 18268c2ecf20Sopenharmony_ciSYM_FUNC_END(_key_expansion_256b) 18278c2ecf20Sopenharmony_ci 18288c2ecf20Sopenharmony_ci/* 18298c2ecf20Sopenharmony_ci * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 18308c2ecf20Sopenharmony_ci * unsigned int key_len) 18318c2ecf20Sopenharmony_ci */ 18328c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_set_key) 18338c2ecf20Sopenharmony_ci FRAME_BEGIN 18348c2ecf20Sopenharmony_ci#ifndef __x86_64__ 18358c2ecf20Sopenharmony_ci pushl KEYP 18368c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+8)(%esp), KEYP # ctx 18378c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key 18388c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), %edx # key_len 18398c2ecf20Sopenharmony_ci#endif 18408c2ecf20Sopenharmony_ci movups (UKEYP), %xmm0 # user key (first 16 bytes) 18418c2ecf20Sopenharmony_ci movaps %xmm0, (KEYP) 18428c2ecf20Sopenharmony_ci lea 0x10(KEYP), TKEYP # key addr 18438c2ecf20Sopenharmony_ci movl %edx, 480(KEYP) 18448c2ecf20Sopenharmony_ci pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 18458c2ecf20Sopenharmony_ci cmp $24, %dl 18468c2ecf20Sopenharmony_ci jb .Lenc_key128 18478c2ecf20Sopenharmony_ci je .Lenc_key192 18488c2ecf20Sopenharmony_ci movups 0x10(UKEYP), %xmm2 # other user key 18498c2ecf20Sopenharmony_ci movaps %xmm2, (TKEYP) 18508c2ecf20Sopenharmony_ci add $0x10, TKEYP 18518c2ecf20Sopenharmony_ci aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 18528c2ecf20Sopenharmony_ci call _key_expansion_256a 18538c2ecf20Sopenharmony_ci aeskeygenassist $0x1, %xmm0, %xmm1 18548c2ecf20Sopenharmony_ci call _key_expansion_256b 18558c2ecf20Sopenharmony_ci aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 18568c2ecf20Sopenharmony_ci call _key_expansion_256a 18578c2ecf20Sopenharmony_ci aeskeygenassist $0x2, %xmm0, %xmm1 18588c2ecf20Sopenharmony_ci call _key_expansion_256b 18598c2ecf20Sopenharmony_ci aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 18608c2ecf20Sopenharmony_ci call _key_expansion_256a 18618c2ecf20Sopenharmony_ci aeskeygenassist $0x4, %xmm0, %xmm1 18628c2ecf20Sopenharmony_ci call _key_expansion_256b 18638c2ecf20Sopenharmony_ci aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 18648c2ecf20Sopenharmony_ci call _key_expansion_256a 18658c2ecf20Sopenharmony_ci aeskeygenassist $0x8, %xmm0, %xmm1 18668c2ecf20Sopenharmony_ci call _key_expansion_256b 18678c2ecf20Sopenharmony_ci aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 18688c2ecf20Sopenharmony_ci call _key_expansion_256a 18698c2ecf20Sopenharmony_ci aeskeygenassist $0x10, %xmm0, %xmm1 18708c2ecf20Sopenharmony_ci call _key_expansion_256b 18718c2ecf20Sopenharmony_ci aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 18728c2ecf20Sopenharmony_ci call _key_expansion_256a 18738c2ecf20Sopenharmony_ci aeskeygenassist $0x20, %xmm0, %xmm1 18748c2ecf20Sopenharmony_ci call _key_expansion_256b 18758c2ecf20Sopenharmony_ci aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 18768c2ecf20Sopenharmony_ci call _key_expansion_256a 18778c2ecf20Sopenharmony_ci jmp .Ldec_key 18788c2ecf20Sopenharmony_ci.Lenc_key192: 18798c2ecf20Sopenharmony_ci movq 0x10(UKEYP), %xmm2 # other user key 18808c2ecf20Sopenharmony_ci aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 18818c2ecf20Sopenharmony_ci call _key_expansion_192a 18828c2ecf20Sopenharmony_ci aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 18838c2ecf20Sopenharmony_ci call _key_expansion_192b 18848c2ecf20Sopenharmony_ci aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 18858c2ecf20Sopenharmony_ci call _key_expansion_192a 18868c2ecf20Sopenharmony_ci aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 18878c2ecf20Sopenharmony_ci call _key_expansion_192b 18888c2ecf20Sopenharmony_ci aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 18898c2ecf20Sopenharmony_ci call _key_expansion_192a 18908c2ecf20Sopenharmony_ci aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 18918c2ecf20Sopenharmony_ci call _key_expansion_192b 18928c2ecf20Sopenharmony_ci aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 18938c2ecf20Sopenharmony_ci call _key_expansion_192a 18948c2ecf20Sopenharmony_ci aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 18958c2ecf20Sopenharmony_ci call _key_expansion_192b 18968c2ecf20Sopenharmony_ci jmp .Ldec_key 18978c2ecf20Sopenharmony_ci.Lenc_key128: 18988c2ecf20Sopenharmony_ci aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 18998c2ecf20Sopenharmony_ci call _key_expansion_128 19008c2ecf20Sopenharmony_ci aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 19018c2ecf20Sopenharmony_ci call _key_expansion_128 19028c2ecf20Sopenharmony_ci aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 19038c2ecf20Sopenharmony_ci call _key_expansion_128 19048c2ecf20Sopenharmony_ci aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 19058c2ecf20Sopenharmony_ci call _key_expansion_128 19068c2ecf20Sopenharmony_ci aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 19078c2ecf20Sopenharmony_ci call _key_expansion_128 19088c2ecf20Sopenharmony_ci aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 19098c2ecf20Sopenharmony_ci call _key_expansion_128 19108c2ecf20Sopenharmony_ci aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 19118c2ecf20Sopenharmony_ci call _key_expansion_128 19128c2ecf20Sopenharmony_ci aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 19138c2ecf20Sopenharmony_ci call _key_expansion_128 19148c2ecf20Sopenharmony_ci aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 19158c2ecf20Sopenharmony_ci call _key_expansion_128 19168c2ecf20Sopenharmony_ci aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 19178c2ecf20Sopenharmony_ci call _key_expansion_128 19188c2ecf20Sopenharmony_ci.Ldec_key: 19198c2ecf20Sopenharmony_ci sub $0x10, TKEYP 19208c2ecf20Sopenharmony_ci movaps (KEYP), %xmm0 19218c2ecf20Sopenharmony_ci movaps (TKEYP), %xmm1 19228c2ecf20Sopenharmony_ci movaps %xmm0, 240(TKEYP) 19238c2ecf20Sopenharmony_ci movaps %xmm1, 240(KEYP) 19248c2ecf20Sopenharmony_ci add $0x10, KEYP 19258c2ecf20Sopenharmony_ci lea 240-16(TKEYP), UKEYP 19268c2ecf20Sopenharmony_ci.align 4 19278c2ecf20Sopenharmony_ci.Ldec_key_loop: 19288c2ecf20Sopenharmony_ci movaps (KEYP), %xmm0 19298c2ecf20Sopenharmony_ci aesimc %xmm0, %xmm1 19308c2ecf20Sopenharmony_ci movaps %xmm1, (UKEYP) 19318c2ecf20Sopenharmony_ci add $0x10, KEYP 19328c2ecf20Sopenharmony_ci sub $0x10, UKEYP 19338c2ecf20Sopenharmony_ci cmp TKEYP, KEYP 19348c2ecf20Sopenharmony_ci jb .Ldec_key_loop 19358c2ecf20Sopenharmony_ci xor AREG, AREG 19368c2ecf20Sopenharmony_ci#ifndef __x86_64__ 19378c2ecf20Sopenharmony_ci popl KEYP 19388c2ecf20Sopenharmony_ci#endif 19398c2ecf20Sopenharmony_ci FRAME_END 19408c2ecf20Sopenharmony_ci RET 19418c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_set_key) 19428c2ecf20Sopenharmony_ci 19438c2ecf20Sopenharmony_ci/* 19448c2ecf20Sopenharmony_ci * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) 19458c2ecf20Sopenharmony_ci */ 19468c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_enc) 19478c2ecf20Sopenharmony_ci FRAME_BEGIN 19488c2ecf20Sopenharmony_ci#ifndef __x86_64__ 19498c2ecf20Sopenharmony_ci pushl KEYP 19508c2ecf20Sopenharmony_ci pushl KLEN 19518c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 19528c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), OUTP # dst 19538c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), INP # src 19548c2ecf20Sopenharmony_ci#endif 19558c2ecf20Sopenharmony_ci movl 480(KEYP), KLEN # key length 19568c2ecf20Sopenharmony_ci movups (INP), STATE # input 19578c2ecf20Sopenharmony_ci call _aesni_enc1 19588c2ecf20Sopenharmony_ci movups STATE, (OUTP) # output 19598c2ecf20Sopenharmony_ci#ifndef __x86_64__ 19608c2ecf20Sopenharmony_ci popl KLEN 19618c2ecf20Sopenharmony_ci popl KEYP 19628c2ecf20Sopenharmony_ci#endif 19638c2ecf20Sopenharmony_ci FRAME_END 19648c2ecf20Sopenharmony_ci RET 19658c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_enc) 19668c2ecf20Sopenharmony_ci 19678c2ecf20Sopenharmony_ci/* 19688c2ecf20Sopenharmony_ci * _aesni_enc1: internal ABI 19698c2ecf20Sopenharmony_ci * input: 19708c2ecf20Sopenharmony_ci * KEYP: key struct pointer 19718c2ecf20Sopenharmony_ci * KLEN: round count 19728c2ecf20Sopenharmony_ci * STATE: initial state (input) 19738c2ecf20Sopenharmony_ci * output: 19748c2ecf20Sopenharmony_ci * STATE: finial state (output) 19758c2ecf20Sopenharmony_ci * changed: 19768c2ecf20Sopenharmony_ci * KEY 19778c2ecf20Sopenharmony_ci * TKEYP (T1) 19788c2ecf20Sopenharmony_ci */ 19798c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_enc1) 19808c2ecf20Sopenharmony_ci movaps (KEYP), KEY # key 19818c2ecf20Sopenharmony_ci mov KEYP, TKEYP 19828c2ecf20Sopenharmony_ci pxor KEY, STATE # round 0 19838c2ecf20Sopenharmony_ci add $0x30, TKEYP 19848c2ecf20Sopenharmony_ci cmp $24, KLEN 19858c2ecf20Sopenharmony_ci jb .Lenc128 19868c2ecf20Sopenharmony_ci lea 0x20(TKEYP), TKEYP 19878c2ecf20Sopenharmony_ci je .Lenc192 19888c2ecf20Sopenharmony_ci add $0x20, TKEYP 19898c2ecf20Sopenharmony_ci movaps -0x60(TKEYP), KEY 19908c2ecf20Sopenharmony_ci aesenc KEY, STATE 19918c2ecf20Sopenharmony_ci movaps -0x50(TKEYP), KEY 19928c2ecf20Sopenharmony_ci aesenc KEY, STATE 19938c2ecf20Sopenharmony_ci.align 4 19948c2ecf20Sopenharmony_ci.Lenc192: 19958c2ecf20Sopenharmony_ci movaps -0x40(TKEYP), KEY 19968c2ecf20Sopenharmony_ci aesenc KEY, STATE 19978c2ecf20Sopenharmony_ci movaps -0x30(TKEYP), KEY 19988c2ecf20Sopenharmony_ci aesenc KEY, STATE 19998c2ecf20Sopenharmony_ci.align 4 20008c2ecf20Sopenharmony_ci.Lenc128: 20018c2ecf20Sopenharmony_ci movaps -0x20(TKEYP), KEY 20028c2ecf20Sopenharmony_ci aesenc KEY, STATE 20038c2ecf20Sopenharmony_ci movaps -0x10(TKEYP), KEY 20048c2ecf20Sopenharmony_ci aesenc KEY, STATE 20058c2ecf20Sopenharmony_ci movaps (TKEYP), KEY 20068c2ecf20Sopenharmony_ci aesenc KEY, STATE 20078c2ecf20Sopenharmony_ci movaps 0x10(TKEYP), KEY 20088c2ecf20Sopenharmony_ci aesenc KEY, STATE 20098c2ecf20Sopenharmony_ci movaps 0x20(TKEYP), KEY 20108c2ecf20Sopenharmony_ci aesenc KEY, STATE 20118c2ecf20Sopenharmony_ci movaps 0x30(TKEYP), KEY 20128c2ecf20Sopenharmony_ci aesenc KEY, STATE 20138c2ecf20Sopenharmony_ci movaps 0x40(TKEYP), KEY 20148c2ecf20Sopenharmony_ci aesenc KEY, STATE 20158c2ecf20Sopenharmony_ci movaps 0x50(TKEYP), KEY 20168c2ecf20Sopenharmony_ci aesenc KEY, STATE 20178c2ecf20Sopenharmony_ci movaps 0x60(TKEYP), KEY 20188c2ecf20Sopenharmony_ci aesenc KEY, STATE 20198c2ecf20Sopenharmony_ci movaps 0x70(TKEYP), KEY 20208c2ecf20Sopenharmony_ci aesenclast KEY, STATE 20218c2ecf20Sopenharmony_ci RET 20228c2ecf20Sopenharmony_ciSYM_FUNC_END(_aesni_enc1) 20238c2ecf20Sopenharmony_ci 20248c2ecf20Sopenharmony_ci/* 20258c2ecf20Sopenharmony_ci * _aesni_enc4: internal ABI 20268c2ecf20Sopenharmony_ci * input: 20278c2ecf20Sopenharmony_ci * KEYP: key struct pointer 20288c2ecf20Sopenharmony_ci * KLEN: round count 20298c2ecf20Sopenharmony_ci * STATE1: initial state (input) 20308c2ecf20Sopenharmony_ci * STATE2 20318c2ecf20Sopenharmony_ci * STATE3 20328c2ecf20Sopenharmony_ci * STATE4 20338c2ecf20Sopenharmony_ci * output: 20348c2ecf20Sopenharmony_ci * STATE1: finial state (output) 20358c2ecf20Sopenharmony_ci * STATE2 20368c2ecf20Sopenharmony_ci * STATE3 20378c2ecf20Sopenharmony_ci * STATE4 20388c2ecf20Sopenharmony_ci * changed: 20398c2ecf20Sopenharmony_ci * KEY 20408c2ecf20Sopenharmony_ci * TKEYP (T1) 20418c2ecf20Sopenharmony_ci */ 20428c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_enc4) 20438c2ecf20Sopenharmony_ci movaps (KEYP), KEY # key 20448c2ecf20Sopenharmony_ci mov KEYP, TKEYP 20458c2ecf20Sopenharmony_ci pxor KEY, STATE1 # round 0 20468c2ecf20Sopenharmony_ci pxor KEY, STATE2 20478c2ecf20Sopenharmony_ci pxor KEY, STATE3 20488c2ecf20Sopenharmony_ci pxor KEY, STATE4 20498c2ecf20Sopenharmony_ci add $0x30, TKEYP 20508c2ecf20Sopenharmony_ci cmp $24, KLEN 20518c2ecf20Sopenharmony_ci jb .L4enc128 20528c2ecf20Sopenharmony_ci lea 0x20(TKEYP), TKEYP 20538c2ecf20Sopenharmony_ci je .L4enc192 20548c2ecf20Sopenharmony_ci add $0x20, TKEYP 20558c2ecf20Sopenharmony_ci movaps -0x60(TKEYP), KEY 20568c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20578c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20588c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20598c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20608c2ecf20Sopenharmony_ci movaps -0x50(TKEYP), KEY 20618c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20628c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20638c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20648c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20658c2ecf20Sopenharmony_ci#.align 4 20668c2ecf20Sopenharmony_ci.L4enc192: 20678c2ecf20Sopenharmony_ci movaps -0x40(TKEYP), KEY 20688c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20698c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20708c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20718c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20728c2ecf20Sopenharmony_ci movaps -0x30(TKEYP), KEY 20738c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20748c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20758c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20768c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20778c2ecf20Sopenharmony_ci#.align 4 20788c2ecf20Sopenharmony_ci.L4enc128: 20798c2ecf20Sopenharmony_ci movaps -0x20(TKEYP), KEY 20808c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20818c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20828c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20838c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20848c2ecf20Sopenharmony_ci movaps -0x10(TKEYP), KEY 20858c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20868c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20878c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20888c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20898c2ecf20Sopenharmony_ci movaps (TKEYP), KEY 20908c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20918c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20928c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20938c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20948c2ecf20Sopenharmony_ci movaps 0x10(TKEYP), KEY 20958c2ecf20Sopenharmony_ci aesenc KEY, STATE1 20968c2ecf20Sopenharmony_ci aesenc KEY, STATE2 20978c2ecf20Sopenharmony_ci aesenc KEY, STATE3 20988c2ecf20Sopenharmony_ci aesenc KEY, STATE4 20998c2ecf20Sopenharmony_ci movaps 0x20(TKEYP), KEY 21008c2ecf20Sopenharmony_ci aesenc KEY, STATE1 21018c2ecf20Sopenharmony_ci aesenc KEY, STATE2 21028c2ecf20Sopenharmony_ci aesenc KEY, STATE3 21038c2ecf20Sopenharmony_ci aesenc KEY, STATE4 21048c2ecf20Sopenharmony_ci movaps 0x30(TKEYP), KEY 21058c2ecf20Sopenharmony_ci aesenc KEY, STATE1 21068c2ecf20Sopenharmony_ci aesenc KEY, STATE2 21078c2ecf20Sopenharmony_ci aesenc KEY, STATE3 21088c2ecf20Sopenharmony_ci aesenc KEY, STATE4 21098c2ecf20Sopenharmony_ci movaps 0x40(TKEYP), KEY 21108c2ecf20Sopenharmony_ci aesenc KEY, STATE1 21118c2ecf20Sopenharmony_ci aesenc KEY, STATE2 21128c2ecf20Sopenharmony_ci aesenc KEY, STATE3 21138c2ecf20Sopenharmony_ci aesenc KEY, STATE4 21148c2ecf20Sopenharmony_ci movaps 0x50(TKEYP), KEY 21158c2ecf20Sopenharmony_ci aesenc KEY, STATE1 21168c2ecf20Sopenharmony_ci aesenc KEY, STATE2 21178c2ecf20Sopenharmony_ci aesenc KEY, STATE3 21188c2ecf20Sopenharmony_ci aesenc KEY, STATE4 21198c2ecf20Sopenharmony_ci movaps 0x60(TKEYP), KEY 21208c2ecf20Sopenharmony_ci aesenc KEY, STATE1 21218c2ecf20Sopenharmony_ci aesenc KEY, STATE2 21228c2ecf20Sopenharmony_ci aesenc KEY, STATE3 21238c2ecf20Sopenharmony_ci aesenc KEY, STATE4 21248c2ecf20Sopenharmony_ci movaps 0x70(TKEYP), KEY 21258c2ecf20Sopenharmony_ci aesenclast KEY, STATE1 # last round 21268c2ecf20Sopenharmony_ci aesenclast KEY, STATE2 21278c2ecf20Sopenharmony_ci aesenclast KEY, STATE3 21288c2ecf20Sopenharmony_ci aesenclast KEY, STATE4 21298c2ecf20Sopenharmony_ci RET 21308c2ecf20Sopenharmony_ciSYM_FUNC_END(_aesni_enc4) 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci/* 21338c2ecf20Sopenharmony_ci * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) 21348c2ecf20Sopenharmony_ci */ 21358c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_dec) 21368c2ecf20Sopenharmony_ci FRAME_BEGIN 21378c2ecf20Sopenharmony_ci#ifndef __x86_64__ 21388c2ecf20Sopenharmony_ci pushl KEYP 21398c2ecf20Sopenharmony_ci pushl KLEN 21408c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 21418c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), OUTP # dst 21428c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), INP # src 21438c2ecf20Sopenharmony_ci#endif 21448c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN # key length 21458c2ecf20Sopenharmony_ci add $240, KEYP 21468c2ecf20Sopenharmony_ci movups (INP), STATE # input 21478c2ecf20Sopenharmony_ci call _aesni_dec1 21488c2ecf20Sopenharmony_ci movups STATE, (OUTP) #output 21498c2ecf20Sopenharmony_ci#ifndef __x86_64__ 21508c2ecf20Sopenharmony_ci popl KLEN 21518c2ecf20Sopenharmony_ci popl KEYP 21528c2ecf20Sopenharmony_ci#endif 21538c2ecf20Sopenharmony_ci FRAME_END 21548c2ecf20Sopenharmony_ci RET 21558c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_dec) 21568c2ecf20Sopenharmony_ci 21578c2ecf20Sopenharmony_ci/* 21588c2ecf20Sopenharmony_ci * _aesni_dec1: internal ABI 21598c2ecf20Sopenharmony_ci * input: 21608c2ecf20Sopenharmony_ci * KEYP: key struct pointer 21618c2ecf20Sopenharmony_ci * KLEN: key length 21628c2ecf20Sopenharmony_ci * STATE: initial state (input) 21638c2ecf20Sopenharmony_ci * output: 21648c2ecf20Sopenharmony_ci * STATE: finial state (output) 21658c2ecf20Sopenharmony_ci * changed: 21668c2ecf20Sopenharmony_ci * KEY 21678c2ecf20Sopenharmony_ci * TKEYP (T1) 21688c2ecf20Sopenharmony_ci */ 21698c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_dec1) 21708c2ecf20Sopenharmony_ci movaps (KEYP), KEY # key 21718c2ecf20Sopenharmony_ci mov KEYP, TKEYP 21728c2ecf20Sopenharmony_ci pxor KEY, STATE # round 0 21738c2ecf20Sopenharmony_ci add $0x30, TKEYP 21748c2ecf20Sopenharmony_ci cmp $24, KLEN 21758c2ecf20Sopenharmony_ci jb .Ldec128 21768c2ecf20Sopenharmony_ci lea 0x20(TKEYP), TKEYP 21778c2ecf20Sopenharmony_ci je .Ldec192 21788c2ecf20Sopenharmony_ci add $0x20, TKEYP 21798c2ecf20Sopenharmony_ci movaps -0x60(TKEYP), KEY 21808c2ecf20Sopenharmony_ci aesdec KEY, STATE 21818c2ecf20Sopenharmony_ci movaps -0x50(TKEYP), KEY 21828c2ecf20Sopenharmony_ci aesdec KEY, STATE 21838c2ecf20Sopenharmony_ci.align 4 21848c2ecf20Sopenharmony_ci.Ldec192: 21858c2ecf20Sopenharmony_ci movaps -0x40(TKEYP), KEY 21868c2ecf20Sopenharmony_ci aesdec KEY, STATE 21878c2ecf20Sopenharmony_ci movaps -0x30(TKEYP), KEY 21888c2ecf20Sopenharmony_ci aesdec KEY, STATE 21898c2ecf20Sopenharmony_ci.align 4 21908c2ecf20Sopenharmony_ci.Ldec128: 21918c2ecf20Sopenharmony_ci movaps -0x20(TKEYP), KEY 21928c2ecf20Sopenharmony_ci aesdec KEY, STATE 21938c2ecf20Sopenharmony_ci movaps -0x10(TKEYP), KEY 21948c2ecf20Sopenharmony_ci aesdec KEY, STATE 21958c2ecf20Sopenharmony_ci movaps (TKEYP), KEY 21968c2ecf20Sopenharmony_ci aesdec KEY, STATE 21978c2ecf20Sopenharmony_ci movaps 0x10(TKEYP), KEY 21988c2ecf20Sopenharmony_ci aesdec KEY, STATE 21998c2ecf20Sopenharmony_ci movaps 0x20(TKEYP), KEY 22008c2ecf20Sopenharmony_ci aesdec KEY, STATE 22018c2ecf20Sopenharmony_ci movaps 0x30(TKEYP), KEY 22028c2ecf20Sopenharmony_ci aesdec KEY, STATE 22038c2ecf20Sopenharmony_ci movaps 0x40(TKEYP), KEY 22048c2ecf20Sopenharmony_ci aesdec KEY, STATE 22058c2ecf20Sopenharmony_ci movaps 0x50(TKEYP), KEY 22068c2ecf20Sopenharmony_ci aesdec KEY, STATE 22078c2ecf20Sopenharmony_ci movaps 0x60(TKEYP), KEY 22088c2ecf20Sopenharmony_ci aesdec KEY, STATE 22098c2ecf20Sopenharmony_ci movaps 0x70(TKEYP), KEY 22108c2ecf20Sopenharmony_ci aesdeclast KEY, STATE 22118c2ecf20Sopenharmony_ci RET 22128c2ecf20Sopenharmony_ciSYM_FUNC_END(_aesni_dec1) 22138c2ecf20Sopenharmony_ci 22148c2ecf20Sopenharmony_ci/* 22158c2ecf20Sopenharmony_ci * _aesni_dec4: internal ABI 22168c2ecf20Sopenharmony_ci * input: 22178c2ecf20Sopenharmony_ci * KEYP: key struct pointer 22188c2ecf20Sopenharmony_ci * KLEN: key length 22198c2ecf20Sopenharmony_ci * STATE1: initial state (input) 22208c2ecf20Sopenharmony_ci * STATE2 22218c2ecf20Sopenharmony_ci * STATE3 22228c2ecf20Sopenharmony_ci * STATE4 22238c2ecf20Sopenharmony_ci * output: 22248c2ecf20Sopenharmony_ci * STATE1: finial state (output) 22258c2ecf20Sopenharmony_ci * STATE2 22268c2ecf20Sopenharmony_ci * STATE3 22278c2ecf20Sopenharmony_ci * STATE4 22288c2ecf20Sopenharmony_ci * changed: 22298c2ecf20Sopenharmony_ci * KEY 22308c2ecf20Sopenharmony_ci * TKEYP (T1) 22318c2ecf20Sopenharmony_ci */ 22328c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_dec4) 22338c2ecf20Sopenharmony_ci movaps (KEYP), KEY # key 22348c2ecf20Sopenharmony_ci mov KEYP, TKEYP 22358c2ecf20Sopenharmony_ci pxor KEY, STATE1 # round 0 22368c2ecf20Sopenharmony_ci pxor KEY, STATE2 22378c2ecf20Sopenharmony_ci pxor KEY, STATE3 22388c2ecf20Sopenharmony_ci pxor KEY, STATE4 22398c2ecf20Sopenharmony_ci add $0x30, TKEYP 22408c2ecf20Sopenharmony_ci cmp $24, KLEN 22418c2ecf20Sopenharmony_ci jb .L4dec128 22428c2ecf20Sopenharmony_ci lea 0x20(TKEYP), TKEYP 22438c2ecf20Sopenharmony_ci je .L4dec192 22448c2ecf20Sopenharmony_ci add $0x20, TKEYP 22458c2ecf20Sopenharmony_ci movaps -0x60(TKEYP), KEY 22468c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22478c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22488c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22498c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22508c2ecf20Sopenharmony_ci movaps -0x50(TKEYP), KEY 22518c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22528c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22538c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22548c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22558c2ecf20Sopenharmony_ci.align 4 22568c2ecf20Sopenharmony_ci.L4dec192: 22578c2ecf20Sopenharmony_ci movaps -0x40(TKEYP), KEY 22588c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22598c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22608c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22618c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22628c2ecf20Sopenharmony_ci movaps -0x30(TKEYP), KEY 22638c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22648c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22658c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22668c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22678c2ecf20Sopenharmony_ci.align 4 22688c2ecf20Sopenharmony_ci.L4dec128: 22698c2ecf20Sopenharmony_ci movaps -0x20(TKEYP), KEY 22708c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22718c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22728c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22738c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22748c2ecf20Sopenharmony_ci movaps -0x10(TKEYP), KEY 22758c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22768c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22778c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22788c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22798c2ecf20Sopenharmony_ci movaps (TKEYP), KEY 22808c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22818c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22828c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22838c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22848c2ecf20Sopenharmony_ci movaps 0x10(TKEYP), KEY 22858c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22868c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22878c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22888c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22898c2ecf20Sopenharmony_ci movaps 0x20(TKEYP), KEY 22908c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22918c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22928c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22938c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22948c2ecf20Sopenharmony_ci movaps 0x30(TKEYP), KEY 22958c2ecf20Sopenharmony_ci aesdec KEY, STATE1 22968c2ecf20Sopenharmony_ci aesdec KEY, STATE2 22978c2ecf20Sopenharmony_ci aesdec KEY, STATE3 22988c2ecf20Sopenharmony_ci aesdec KEY, STATE4 22998c2ecf20Sopenharmony_ci movaps 0x40(TKEYP), KEY 23008c2ecf20Sopenharmony_ci aesdec KEY, STATE1 23018c2ecf20Sopenharmony_ci aesdec KEY, STATE2 23028c2ecf20Sopenharmony_ci aesdec KEY, STATE3 23038c2ecf20Sopenharmony_ci aesdec KEY, STATE4 23048c2ecf20Sopenharmony_ci movaps 0x50(TKEYP), KEY 23058c2ecf20Sopenharmony_ci aesdec KEY, STATE1 23068c2ecf20Sopenharmony_ci aesdec KEY, STATE2 23078c2ecf20Sopenharmony_ci aesdec KEY, STATE3 23088c2ecf20Sopenharmony_ci aesdec KEY, STATE4 23098c2ecf20Sopenharmony_ci movaps 0x60(TKEYP), KEY 23108c2ecf20Sopenharmony_ci aesdec KEY, STATE1 23118c2ecf20Sopenharmony_ci aesdec KEY, STATE2 23128c2ecf20Sopenharmony_ci aesdec KEY, STATE3 23138c2ecf20Sopenharmony_ci aesdec KEY, STATE4 23148c2ecf20Sopenharmony_ci movaps 0x70(TKEYP), KEY 23158c2ecf20Sopenharmony_ci aesdeclast KEY, STATE1 # last round 23168c2ecf20Sopenharmony_ci aesdeclast KEY, STATE2 23178c2ecf20Sopenharmony_ci aesdeclast KEY, STATE3 23188c2ecf20Sopenharmony_ci aesdeclast KEY, STATE4 23198c2ecf20Sopenharmony_ci RET 23208c2ecf20Sopenharmony_ciSYM_FUNC_END(_aesni_dec4) 23218c2ecf20Sopenharmony_ci 23228c2ecf20Sopenharmony_ci/* 23238c2ecf20Sopenharmony_ci * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 23248c2ecf20Sopenharmony_ci * size_t len) 23258c2ecf20Sopenharmony_ci */ 23268c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_ecb_enc) 23278c2ecf20Sopenharmony_ci FRAME_BEGIN 23288c2ecf20Sopenharmony_ci#ifndef __x86_64__ 23298c2ecf20Sopenharmony_ci pushl LEN 23308c2ecf20Sopenharmony_ci pushl KEYP 23318c2ecf20Sopenharmony_ci pushl KLEN 23328c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 23338c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), OUTP # dst 23348c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), INP # src 23358c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), LEN # len 23368c2ecf20Sopenharmony_ci#endif 23378c2ecf20Sopenharmony_ci test LEN, LEN # check length 23388c2ecf20Sopenharmony_ci jz .Lecb_enc_ret 23398c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 23408c2ecf20Sopenharmony_ci cmp $16, LEN 23418c2ecf20Sopenharmony_ci jb .Lecb_enc_ret 23428c2ecf20Sopenharmony_ci cmp $64, LEN 23438c2ecf20Sopenharmony_ci jb .Lecb_enc_loop1 23448c2ecf20Sopenharmony_ci.align 4 23458c2ecf20Sopenharmony_ci.Lecb_enc_loop4: 23468c2ecf20Sopenharmony_ci movups (INP), STATE1 23478c2ecf20Sopenharmony_ci movups 0x10(INP), STATE2 23488c2ecf20Sopenharmony_ci movups 0x20(INP), STATE3 23498c2ecf20Sopenharmony_ci movups 0x30(INP), STATE4 23508c2ecf20Sopenharmony_ci call _aesni_enc4 23518c2ecf20Sopenharmony_ci movups STATE1, (OUTP) 23528c2ecf20Sopenharmony_ci movups STATE2, 0x10(OUTP) 23538c2ecf20Sopenharmony_ci movups STATE3, 0x20(OUTP) 23548c2ecf20Sopenharmony_ci movups STATE4, 0x30(OUTP) 23558c2ecf20Sopenharmony_ci sub $64, LEN 23568c2ecf20Sopenharmony_ci add $64, INP 23578c2ecf20Sopenharmony_ci add $64, OUTP 23588c2ecf20Sopenharmony_ci cmp $64, LEN 23598c2ecf20Sopenharmony_ci jge .Lecb_enc_loop4 23608c2ecf20Sopenharmony_ci cmp $16, LEN 23618c2ecf20Sopenharmony_ci jb .Lecb_enc_ret 23628c2ecf20Sopenharmony_ci.align 4 23638c2ecf20Sopenharmony_ci.Lecb_enc_loop1: 23648c2ecf20Sopenharmony_ci movups (INP), STATE1 23658c2ecf20Sopenharmony_ci call _aesni_enc1 23668c2ecf20Sopenharmony_ci movups STATE1, (OUTP) 23678c2ecf20Sopenharmony_ci sub $16, LEN 23688c2ecf20Sopenharmony_ci add $16, INP 23698c2ecf20Sopenharmony_ci add $16, OUTP 23708c2ecf20Sopenharmony_ci cmp $16, LEN 23718c2ecf20Sopenharmony_ci jge .Lecb_enc_loop1 23728c2ecf20Sopenharmony_ci.Lecb_enc_ret: 23738c2ecf20Sopenharmony_ci#ifndef __x86_64__ 23748c2ecf20Sopenharmony_ci popl KLEN 23758c2ecf20Sopenharmony_ci popl KEYP 23768c2ecf20Sopenharmony_ci popl LEN 23778c2ecf20Sopenharmony_ci#endif 23788c2ecf20Sopenharmony_ci FRAME_END 23798c2ecf20Sopenharmony_ci RET 23808c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_ecb_enc) 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci/* 23838c2ecf20Sopenharmony_ci * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 23848c2ecf20Sopenharmony_ci * size_t len); 23858c2ecf20Sopenharmony_ci */ 23868c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_ecb_dec) 23878c2ecf20Sopenharmony_ci FRAME_BEGIN 23888c2ecf20Sopenharmony_ci#ifndef __x86_64__ 23898c2ecf20Sopenharmony_ci pushl LEN 23908c2ecf20Sopenharmony_ci pushl KEYP 23918c2ecf20Sopenharmony_ci pushl KLEN 23928c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 23938c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), OUTP # dst 23948c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), INP # src 23958c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), LEN # len 23968c2ecf20Sopenharmony_ci#endif 23978c2ecf20Sopenharmony_ci test LEN, LEN 23988c2ecf20Sopenharmony_ci jz .Lecb_dec_ret 23998c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 24008c2ecf20Sopenharmony_ci add $240, KEYP 24018c2ecf20Sopenharmony_ci cmp $16, LEN 24028c2ecf20Sopenharmony_ci jb .Lecb_dec_ret 24038c2ecf20Sopenharmony_ci cmp $64, LEN 24048c2ecf20Sopenharmony_ci jb .Lecb_dec_loop1 24058c2ecf20Sopenharmony_ci.align 4 24068c2ecf20Sopenharmony_ci.Lecb_dec_loop4: 24078c2ecf20Sopenharmony_ci movups (INP), STATE1 24088c2ecf20Sopenharmony_ci movups 0x10(INP), STATE2 24098c2ecf20Sopenharmony_ci movups 0x20(INP), STATE3 24108c2ecf20Sopenharmony_ci movups 0x30(INP), STATE4 24118c2ecf20Sopenharmony_ci call _aesni_dec4 24128c2ecf20Sopenharmony_ci movups STATE1, (OUTP) 24138c2ecf20Sopenharmony_ci movups STATE2, 0x10(OUTP) 24148c2ecf20Sopenharmony_ci movups STATE3, 0x20(OUTP) 24158c2ecf20Sopenharmony_ci movups STATE4, 0x30(OUTP) 24168c2ecf20Sopenharmony_ci sub $64, LEN 24178c2ecf20Sopenharmony_ci add $64, INP 24188c2ecf20Sopenharmony_ci add $64, OUTP 24198c2ecf20Sopenharmony_ci cmp $64, LEN 24208c2ecf20Sopenharmony_ci jge .Lecb_dec_loop4 24218c2ecf20Sopenharmony_ci cmp $16, LEN 24228c2ecf20Sopenharmony_ci jb .Lecb_dec_ret 24238c2ecf20Sopenharmony_ci.align 4 24248c2ecf20Sopenharmony_ci.Lecb_dec_loop1: 24258c2ecf20Sopenharmony_ci movups (INP), STATE1 24268c2ecf20Sopenharmony_ci call _aesni_dec1 24278c2ecf20Sopenharmony_ci movups STATE1, (OUTP) 24288c2ecf20Sopenharmony_ci sub $16, LEN 24298c2ecf20Sopenharmony_ci add $16, INP 24308c2ecf20Sopenharmony_ci add $16, OUTP 24318c2ecf20Sopenharmony_ci cmp $16, LEN 24328c2ecf20Sopenharmony_ci jge .Lecb_dec_loop1 24338c2ecf20Sopenharmony_ci.Lecb_dec_ret: 24348c2ecf20Sopenharmony_ci#ifndef __x86_64__ 24358c2ecf20Sopenharmony_ci popl KLEN 24368c2ecf20Sopenharmony_ci popl KEYP 24378c2ecf20Sopenharmony_ci popl LEN 24388c2ecf20Sopenharmony_ci#endif 24398c2ecf20Sopenharmony_ci FRAME_END 24408c2ecf20Sopenharmony_ci RET 24418c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_ecb_dec) 24428c2ecf20Sopenharmony_ci 24438c2ecf20Sopenharmony_ci/* 24448c2ecf20Sopenharmony_ci * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 24458c2ecf20Sopenharmony_ci * size_t len, u8 *iv) 24468c2ecf20Sopenharmony_ci */ 24478c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_cbc_enc) 24488c2ecf20Sopenharmony_ci FRAME_BEGIN 24498c2ecf20Sopenharmony_ci#ifndef __x86_64__ 24508c2ecf20Sopenharmony_ci pushl IVP 24518c2ecf20Sopenharmony_ci pushl LEN 24528c2ecf20Sopenharmony_ci pushl KEYP 24538c2ecf20Sopenharmony_ci pushl KLEN 24548c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 24558c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 24568c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 24578c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 24588c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 24598c2ecf20Sopenharmony_ci#endif 24608c2ecf20Sopenharmony_ci cmp $16, LEN 24618c2ecf20Sopenharmony_ci jb .Lcbc_enc_ret 24628c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 24638c2ecf20Sopenharmony_ci movups (IVP), STATE # load iv as initial state 24648c2ecf20Sopenharmony_ci.align 4 24658c2ecf20Sopenharmony_ci.Lcbc_enc_loop: 24668c2ecf20Sopenharmony_ci movups (INP), IN # load input 24678c2ecf20Sopenharmony_ci pxor IN, STATE 24688c2ecf20Sopenharmony_ci call _aesni_enc1 24698c2ecf20Sopenharmony_ci movups STATE, (OUTP) # store output 24708c2ecf20Sopenharmony_ci sub $16, LEN 24718c2ecf20Sopenharmony_ci add $16, INP 24728c2ecf20Sopenharmony_ci add $16, OUTP 24738c2ecf20Sopenharmony_ci cmp $16, LEN 24748c2ecf20Sopenharmony_ci jge .Lcbc_enc_loop 24758c2ecf20Sopenharmony_ci movups STATE, (IVP) 24768c2ecf20Sopenharmony_ci.Lcbc_enc_ret: 24778c2ecf20Sopenharmony_ci#ifndef __x86_64__ 24788c2ecf20Sopenharmony_ci popl KLEN 24798c2ecf20Sopenharmony_ci popl KEYP 24808c2ecf20Sopenharmony_ci popl LEN 24818c2ecf20Sopenharmony_ci popl IVP 24828c2ecf20Sopenharmony_ci#endif 24838c2ecf20Sopenharmony_ci FRAME_END 24848c2ecf20Sopenharmony_ci RET 24858c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_cbc_enc) 24868c2ecf20Sopenharmony_ci 24878c2ecf20Sopenharmony_ci/* 24888c2ecf20Sopenharmony_ci * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 24898c2ecf20Sopenharmony_ci * size_t len, u8 *iv) 24908c2ecf20Sopenharmony_ci */ 24918c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_cbc_dec) 24928c2ecf20Sopenharmony_ci FRAME_BEGIN 24938c2ecf20Sopenharmony_ci#ifndef __x86_64__ 24948c2ecf20Sopenharmony_ci pushl IVP 24958c2ecf20Sopenharmony_ci pushl LEN 24968c2ecf20Sopenharmony_ci pushl KEYP 24978c2ecf20Sopenharmony_ci pushl KLEN 24988c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 24998c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 25008c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 25018c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 25028c2ecf20Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 25038c2ecf20Sopenharmony_ci#endif 25048c2ecf20Sopenharmony_ci cmp $16, LEN 25058c2ecf20Sopenharmony_ci jb .Lcbc_dec_just_ret 25068c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 25078c2ecf20Sopenharmony_ci add $240, KEYP 25088c2ecf20Sopenharmony_ci movups (IVP), IV 25098c2ecf20Sopenharmony_ci cmp $64, LEN 25108c2ecf20Sopenharmony_ci jb .Lcbc_dec_loop1 25118c2ecf20Sopenharmony_ci.align 4 25128c2ecf20Sopenharmony_ci.Lcbc_dec_loop4: 25138c2ecf20Sopenharmony_ci movups (INP), IN1 25148c2ecf20Sopenharmony_ci movaps IN1, STATE1 25158c2ecf20Sopenharmony_ci movups 0x10(INP), IN2 25168c2ecf20Sopenharmony_ci movaps IN2, STATE2 25178c2ecf20Sopenharmony_ci#ifdef __x86_64__ 25188c2ecf20Sopenharmony_ci movups 0x20(INP), IN3 25198c2ecf20Sopenharmony_ci movaps IN3, STATE3 25208c2ecf20Sopenharmony_ci movups 0x30(INP), IN4 25218c2ecf20Sopenharmony_ci movaps IN4, STATE4 25228c2ecf20Sopenharmony_ci#else 25238c2ecf20Sopenharmony_ci movups 0x20(INP), IN1 25248c2ecf20Sopenharmony_ci movaps IN1, STATE3 25258c2ecf20Sopenharmony_ci movups 0x30(INP), IN2 25268c2ecf20Sopenharmony_ci movaps IN2, STATE4 25278c2ecf20Sopenharmony_ci#endif 25288c2ecf20Sopenharmony_ci call _aesni_dec4 25298c2ecf20Sopenharmony_ci pxor IV, STATE1 25308c2ecf20Sopenharmony_ci#ifdef __x86_64__ 25318c2ecf20Sopenharmony_ci pxor IN1, STATE2 25328c2ecf20Sopenharmony_ci pxor IN2, STATE3 25338c2ecf20Sopenharmony_ci pxor IN3, STATE4 25348c2ecf20Sopenharmony_ci movaps IN4, IV 25358c2ecf20Sopenharmony_ci#else 25368c2ecf20Sopenharmony_ci pxor IN1, STATE4 25378c2ecf20Sopenharmony_ci movaps IN2, IV 25388c2ecf20Sopenharmony_ci movups (INP), IN1 25398c2ecf20Sopenharmony_ci pxor IN1, STATE2 25408c2ecf20Sopenharmony_ci movups 0x10(INP), IN2 25418c2ecf20Sopenharmony_ci pxor IN2, STATE3 25428c2ecf20Sopenharmony_ci#endif 25438c2ecf20Sopenharmony_ci movups STATE1, (OUTP) 25448c2ecf20Sopenharmony_ci movups STATE2, 0x10(OUTP) 25458c2ecf20Sopenharmony_ci movups STATE3, 0x20(OUTP) 25468c2ecf20Sopenharmony_ci movups STATE4, 0x30(OUTP) 25478c2ecf20Sopenharmony_ci sub $64, LEN 25488c2ecf20Sopenharmony_ci add $64, INP 25498c2ecf20Sopenharmony_ci add $64, OUTP 25508c2ecf20Sopenharmony_ci cmp $64, LEN 25518c2ecf20Sopenharmony_ci jge .Lcbc_dec_loop4 25528c2ecf20Sopenharmony_ci cmp $16, LEN 25538c2ecf20Sopenharmony_ci jb .Lcbc_dec_ret 25548c2ecf20Sopenharmony_ci.align 4 25558c2ecf20Sopenharmony_ci.Lcbc_dec_loop1: 25568c2ecf20Sopenharmony_ci movups (INP), IN 25578c2ecf20Sopenharmony_ci movaps IN, STATE 25588c2ecf20Sopenharmony_ci call _aesni_dec1 25598c2ecf20Sopenharmony_ci pxor IV, STATE 25608c2ecf20Sopenharmony_ci movups STATE, (OUTP) 25618c2ecf20Sopenharmony_ci movaps IN, IV 25628c2ecf20Sopenharmony_ci sub $16, LEN 25638c2ecf20Sopenharmony_ci add $16, INP 25648c2ecf20Sopenharmony_ci add $16, OUTP 25658c2ecf20Sopenharmony_ci cmp $16, LEN 25668c2ecf20Sopenharmony_ci jge .Lcbc_dec_loop1 25678c2ecf20Sopenharmony_ci.Lcbc_dec_ret: 25688c2ecf20Sopenharmony_ci movups IV, (IVP) 25698c2ecf20Sopenharmony_ci.Lcbc_dec_just_ret: 25708c2ecf20Sopenharmony_ci#ifndef __x86_64__ 25718c2ecf20Sopenharmony_ci popl KLEN 25728c2ecf20Sopenharmony_ci popl KEYP 25738c2ecf20Sopenharmony_ci popl LEN 25748c2ecf20Sopenharmony_ci popl IVP 25758c2ecf20Sopenharmony_ci#endif 25768c2ecf20Sopenharmony_ci FRAME_END 25778c2ecf20Sopenharmony_ci RET 25788c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_cbc_dec) 25798c2ecf20Sopenharmony_ci 25808c2ecf20Sopenharmony_ci#ifdef __x86_64__ 25818c2ecf20Sopenharmony_ci.pushsection .rodata 25828c2ecf20Sopenharmony_ci.align 16 25838c2ecf20Sopenharmony_ci.Lbswap_mask: 25848c2ecf20Sopenharmony_ci .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 25858c2ecf20Sopenharmony_ci.popsection 25868c2ecf20Sopenharmony_ci 25878c2ecf20Sopenharmony_ci/* 25888c2ecf20Sopenharmony_ci * _aesni_inc_init: internal ABI 25898c2ecf20Sopenharmony_ci * setup registers used by _aesni_inc 25908c2ecf20Sopenharmony_ci * input: 25918c2ecf20Sopenharmony_ci * IV 25928c2ecf20Sopenharmony_ci * output: 25938c2ecf20Sopenharmony_ci * CTR: == IV, in little endian 25948c2ecf20Sopenharmony_ci * TCTR_LOW: == lower qword of CTR 25958c2ecf20Sopenharmony_ci * INC: == 1, in little endian 25968c2ecf20Sopenharmony_ci * BSWAP_MASK == endian swapping mask 25978c2ecf20Sopenharmony_ci */ 25988c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_inc_init) 25998c2ecf20Sopenharmony_ci movaps .Lbswap_mask, BSWAP_MASK 26008c2ecf20Sopenharmony_ci movaps IV, CTR 26018c2ecf20Sopenharmony_ci pshufb BSWAP_MASK, CTR 26028c2ecf20Sopenharmony_ci mov $1, TCTR_LOW 26038c2ecf20Sopenharmony_ci movq TCTR_LOW, INC 26048c2ecf20Sopenharmony_ci movq CTR, TCTR_LOW 26058c2ecf20Sopenharmony_ci RET 26068c2ecf20Sopenharmony_ciSYM_FUNC_END(_aesni_inc_init) 26078c2ecf20Sopenharmony_ci 26088c2ecf20Sopenharmony_ci/* 26098c2ecf20Sopenharmony_ci * _aesni_inc: internal ABI 26108c2ecf20Sopenharmony_ci * Increase IV by 1, IV is in big endian 26118c2ecf20Sopenharmony_ci * input: 26128c2ecf20Sopenharmony_ci * IV 26138c2ecf20Sopenharmony_ci * CTR: == IV, in little endian 26148c2ecf20Sopenharmony_ci * TCTR_LOW: == lower qword of CTR 26158c2ecf20Sopenharmony_ci * INC: == 1, in little endian 26168c2ecf20Sopenharmony_ci * BSWAP_MASK == endian swapping mask 26178c2ecf20Sopenharmony_ci * output: 26188c2ecf20Sopenharmony_ci * IV: Increase by 1 26198c2ecf20Sopenharmony_ci * changed: 26208c2ecf20Sopenharmony_ci * CTR: == output IV, in little endian 26218c2ecf20Sopenharmony_ci * TCTR_LOW: == lower qword of CTR 26228c2ecf20Sopenharmony_ci */ 26238c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_inc) 26248c2ecf20Sopenharmony_ci paddq INC, CTR 26258c2ecf20Sopenharmony_ci add $1, TCTR_LOW 26268c2ecf20Sopenharmony_ci jnc .Linc_low 26278c2ecf20Sopenharmony_ci pslldq $8, INC 26288c2ecf20Sopenharmony_ci paddq INC, CTR 26298c2ecf20Sopenharmony_ci psrldq $8, INC 26308c2ecf20Sopenharmony_ci.Linc_low: 26318c2ecf20Sopenharmony_ci movaps CTR, IV 26328c2ecf20Sopenharmony_ci pshufb BSWAP_MASK, IV 26338c2ecf20Sopenharmony_ci RET 26348c2ecf20Sopenharmony_ciSYM_FUNC_END(_aesni_inc) 26358c2ecf20Sopenharmony_ci 26368c2ecf20Sopenharmony_ci/* 26378c2ecf20Sopenharmony_ci * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 26388c2ecf20Sopenharmony_ci * size_t len, u8 *iv) 26398c2ecf20Sopenharmony_ci */ 26408c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_ctr_enc) 26418c2ecf20Sopenharmony_ci FRAME_BEGIN 26428c2ecf20Sopenharmony_ci cmp $16, LEN 26438c2ecf20Sopenharmony_ci jb .Lctr_enc_just_ret 26448c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 26458c2ecf20Sopenharmony_ci movups (IVP), IV 26468c2ecf20Sopenharmony_ci call _aesni_inc_init 26478c2ecf20Sopenharmony_ci cmp $64, LEN 26488c2ecf20Sopenharmony_ci jb .Lctr_enc_loop1 26498c2ecf20Sopenharmony_ci.align 4 26508c2ecf20Sopenharmony_ci.Lctr_enc_loop4: 26518c2ecf20Sopenharmony_ci movaps IV, STATE1 26528c2ecf20Sopenharmony_ci call _aesni_inc 26538c2ecf20Sopenharmony_ci movups (INP), IN1 26548c2ecf20Sopenharmony_ci movaps IV, STATE2 26558c2ecf20Sopenharmony_ci call _aesni_inc 26568c2ecf20Sopenharmony_ci movups 0x10(INP), IN2 26578c2ecf20Sopenharmony_ci movaps IV, STATE3 26588c2ecf20Sopenharmony_ci call _aesni_inc 26598c2ecf20Sopenharmony_ci movups 0x20(INP), IN3 26608c2ecf20Sopenharmony_ci movaps IV, STATE4 26618c2ecf20Sopenharmony_ci call _aesni_inc 26628c2ecf20Sopenharmony_ci movups 0x30(INP), IN4 26638c2ecf20Sopenharmony_ci call _aesni_enc4 26648c2ecf20Sopenharmony_ci pxor IN1, STATE1 26658c2ecf20Sopenharmony_ci movups STATE1, (OUTP) 26668c2ecf20Sopenharmony_ci pxor IN2, STATE2 26678c2ecf20Sopenharmony_ci movups STATE2, 0x10(OUTP) 26688c2ecf20Sopenharmony_ci pxor IN3, STATE3 26698c2ecf20Sopenharmony_ci movups STATE3, 0x20(OUTP) 26708c2ecf20Sopenharmony_ci pxor IN4, STATE4 26718c2ecf20Sopenharmony_ci movups STATE4, 0x30(OUTP) 26728c2ecf20Sopenharmony_ci sub $64, LEN 26738c2ecf20Sopenharmony_ci add $64, INP 26748c2ecf20Sopenharmony_ci add $64, OUTP 26758c2ecf20Sopenharmony_ci cmp $64, LEN 26768c2ecf20Sopenharmony_ci jge .Lctr_enc_loop4 26778c2ecf20Sopenharmony_ci cmp $16, LEN 26788c2ecf20Sopenharmony_ci jb .Lctr_enc_ret 26798c2ecf20Sopenharmony_ci.align 4 26808c2ecf20Sopenharmony_ci.Lctr_enc_loop1: 26818c2ecf20Sopenharmony_ci movaps IV, STATE 26828c2ecf20Sopenharmony_ci call _aesni_inc 26838c2ecf20Sopenharmony_ci movups (INP), IN 26848c2ecf20Sopenharmony_ci call _aesni_enc1 26858c2ecf20Sopenharmony_ci pxor IN, STATE 26868c2ecf20Sopenharmony_ci movups STATE, (OUTP) 26878c2ecf20Sopenharmony_ci sub $16, LEN 26888c2ecf20Sopenharmony_ci add $16, INP 26898c2ecf20Sopenharmony_ci add $16, OUTP 26908c2ecf20Sopenharmony_ci cmp $16, LEN 26918c2ecf20Sopenharmony_ci jge .Lctr_enc_loop1 26928c2ecf20Sopenharmony_ci.Lctr_enc_ret: 26938c2ecf20Sopenharmony_ci movups IV, (IVP) 26948c2ecf20Sopenharmony_ci.Lctr_enc_just_ret: 26958c2ecf20Sopenharmony_ci FRAME_END 26968c2ecf20Sopenharmony_ci RET 26978c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_ctr_enc) 26988c2ecf20Sopenharmony_ci 26998c2ecf20Sopenharmony_ci/* 27008c2ecf20Sopenharmony_ci * _aesni_gf128mul_x_ble: internal ABI 27018c2ecf20Sopenharmony_ci * Multiply in GF(2^128) for XTS IVs 27028c2ecf20Sopenharmony_ci * input: 27038c2ecf20Sopenharmony_ci * IV: current IV 27048c2ecf20Sopenharmony_ci * GF128MUL_MASK == mask with 0x87 and 0x01 27058c2ecf20Sopenharmony_ci * output: 27068c2ecf20Sopenharmony_ci * IV: next IV 27078c2ecf20Sopenharmony_ci * changed: 27088c2ecf20Sopenharmony_ci * CTR: == temporary value 27098c2ecf20Sopenharmony_ci */ 27108c2ecf20Sopenharmony_ci#define _aesni_gf128mul_x_ble() \ 27118c2ecf20Sopenharmony_ci pshufd $0x13, IV, CTR; \ 27128c2ecf20Sopenharmony_ci paddq IV, IV; \ 27138c2ecf20Sopenharmony_ci psrad $31, CTR; \ 27148c2ecf20Sopenharmony_ci pand GF128MUL_MASK, CTR; \ 27158c2ecf20Sopenharmony_ci pxor CTR, IV; 27168c2ecf20Sopenharmony_ci 27178c2ecf20Sopenharmony_ci/* 27188c2ecf20Sopenharmony_ci * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, 27198c2ecf20Sopenharmony_ci * const u8 *src, unsigned int len, le128 *iv) 27208c2ecf20Sopenharmony_ci */ 27218c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_xts_encrypt) 27228c2ecf20Sopenharmony_ci FRAME_BEGIN 27238c2ecf20Sopenharmony_ci 27248c2ecf20Sopenharmony_ci movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 27258c2ecf20Sopenharmony_ci movups (IVP), IV 27268c2ecf20Sopenharmony_ci 27278c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 27288c2ecf20Sopenharmony_ci 27298c2ecf20Sopenharmony_ci.Lxts_enc_loop4: 27308c2ecf20Sopenharmony_ci movdqa IV, STATE1 27318c2ecf20Sopenharmony_ci movdqu 0x00(INP), INC 27328c2ecf20Sopenharmony_ci pxor INC, STATE1 27338c2ecf20Sopenharmony_ci movdqu IV, 0x00(OUTP) 27348c2ecf20Sopenharmony_ci 27358c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 27368c2ecf20Sopenharmony_ci movdqa IV, STATE2 27378c2ecf20Sopenharmony_ci movdqu 0x10(INP), INC 27388c2ecf20Sopenharmony_ci pxor INC, STATE2 27398c2ecf20Sopenharmony_ci movdqu IV, 0x10(OUTP) 27408c2ecf20Sopenharmony_ci 27418c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 27428c2ecf20Sopenharmony_ci movdqa IV, STATE3 27438c2ecf20Sopenharmony_ci movdqu 0x20(INP), INC 27448c2ecf20Sopenharmony_ci pxor INC, STATE3 27458c2ecf20Sopenharmony_ci movdqu IV, 0x20(OUTP) 27468c2ecf20Sopenharmony_ci 27478c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 27488c2ecf20Sopenharmony_ci movdqa IV, STATE4 27498c2ecf20Sopenharmony_ci movdqu 0x30(INP), INC 27508c2ecf20Sopenharmony_ci pxor INC, STATE4 27518c2ecf20Sopenharmony_ci movdqu IV, 0x30(OUTP) 27528c2ecf20Sopenharmony_ci 27538c2ecf20Sopenharmony_ci call _aesni_enc4 27548c2ecf20Sopenharmony_ci 27558c2ecf20Sopenharmony_ci movdqu 0x00(OUTP), INC 27568c2ecf20Sopenharmony_ci pxor INC, STATE1 27578c2ecf20Sopenharmony_ci movdqu STATE1, 0x00(OUTP) 27588c2ecf20Sopenharmony_ci 27598c2ecf20Sopenharmony_ci movdqu 0x10(OUTP), INC 27608c2ecf20Sopenharmony_ci pxor INC, STATE2 27618c2ecf20Sopenharmony_ci movdqu STATE2, 0x10(OUTP) 27628c2ecf20Sopenharmony_ci 27638c2ecf20Sopenharmony_ci movdqu 0x20(OUTP), INC 27648c2ecf20Sopenharmony_ci pxor INC, STATE3 27658c2ecf20Sopenharmony_ci movdqu STATE3, 0x20(OUTP) 27668c2ecf20Sopenharmony_ci 27678c2ecf20Sopenharmony_ci movdqu 0x30(OUTP), INC 27688c2ecf20Sopenharmony_ci pxor INC, STATE4 27698c2ecf20Sopenharmony_ci movdqu STATE4, 0x30(OUTP) 27708c2ecf20Sopenharmony_ci 27718c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_ci add $64, INP 27748c2ecf20Sopenharmony_ci add $64, OUTP 27758c2ecf20Sopenharmony_ci sub $64, LEN 27768c2ecf20Sopenharmony_ci ja .Lxts_enc_loop4 27778c2ecf20Sopenharmony_ci 27788c2ecf20Sopenharmony_ci movups IV, (IVP) 27798c2ecf20Sopenharmony_ci 27808c2ecf20Sopenharmony_ci FRAME_END 27818c2ecf20Sopenharmony_ci RET 27828c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_xts_encrypt) 27838c2ecf20Sopenharmony_ci 27848c2ecf20Sopenharmony_ci/* 27858c2ecf20Sopenharmony_ci * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, 27868c2ecf20Sopenharmony_ci * const u8 *src, unsigned int len, le128 *iv) 27878c2ecf20Sopenharmony_ci */ 27888c2ecf20Sopenharmony_ciSYM_FUNC_START(aesni_xts_decrypt) 27898c2ecf20Sopenharmony_ci FRAME_BEGIN 27908c2ecf20Sopenharmony_ci 27918c2ecf20Sopenharmony_ci movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 27928c2ecf20Sopenharmony_ci movups (IVP), IV 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_ci mov 480(KEYP), KLEN 27958c2ecf20Sopenharmony_ci add $240, KEYP 27968c2ecf20Sopenharmony_ci 27978c2ecf20Sopenharmony_ci.Lxts_dec_loop4: 27988c2ecf20Sopenharmony_ci movdqa IV, STATE1 27998c2ecf20Sopenharmony_ci movdqu 0x00(INP), INC 28008c2ecf20Sopenharmony_ci pxor INC, STATE1 28018c2ecf20Sopenharmony_ci movdqu IV, 0x00(OUTP) 28028c2ecf20Sopenharmony_ci 28038c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 28048c2ecf20Sopenharmony_ci movdqa IV, STATE2 28058c2ecf20Sopenharmony_ci movdqu 0x10(INP), INC 28068c2ecf20Sopenharmony_ci pxor INC, STATE2 28078c2ecf20Sopenharmony_ci movdqu IV, 0x10(OUTP) 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 28108c2ecf20Sopenharmony_ci movdqa IV, STATE3 28118c2ecf20Sopenharmony_ci movdqu 0x20(INP), INC 28128c2ecf20Sopenharmony_ci pxor INC, STATE3 28138c2ecf20Sopenharmony_ci movdqu IV, 0x20(OUTP) 28148c2ecf20Sopenharmony_ci 28158c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 28168c2ecf20Sopenharmony_ci movdqa IV, STATE4 28178c2ecf20Sopenharmony_ci movdqu 0x30(INP), INC 28188c2ecf20Sopenharmony_ci pxor INC, STATE4 28198c2ecf20Sopenharmony_ci movdqu IV, 0x30(OUTP) 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ci call _aesni_dec4 28228c2ecf20Sopenharmony_ci 28238c2ecf20Sopenharmony_ci movdqu 0x00(OUTP), INC 28248c2ecf20Sopenharmony_ci pxor INC, STATE1 28258c2ecf20Sopenharmony_ci movdqu STATE1, 0x00(OUTP) 28268c2ecf20Sopenharmony_ci 28278c2ecf20Sopenharmony_ci movdqu 0x10(OUTP), INC 28288c2ecf20Sopenharmony_ci pxor INC, STATE2 28298c2ecf20Sopenharmony_ci movdqu STATE2, 0x10(OUTP) 28308c2ecf20Sopenharmony_ci 28318c2ecf20Sopenharmony_ci movdqu 0x20(OUTP), INC 28328c2ecf20Sopenharmony_ci pxor INC, STATE3 28338c2ecf20Sopenharmony_ci movdqu STATE3, 0x20(OUTP) 28348c2ecf20Sopenharmony_ci 28358c2ecf20Sopenharmony_ci movdqu 0x30(OUTP), INC 28368c2ecf20Sopenharmony_ci pxor INC, STATE4 28378c2ecf20Sopenharmony_ci movdqu STATE4, 0x30(OUTP) 28388c2ecf20Sopenharmony_ci 28398c2ecf20Sopenharmony_ci _aesni_gf128mul_x_ble() 28408c2ecf20Sopenharmony_ci 28418c2ecf20Sopenharmony_ci add $64, INP 28428c2ecf20Sopenharmony_ci add $64, OUTP 28438c2ecf20Sopenharmony_ci sub $64, LEN 28448c2ecf20Sopenharmony_ci ja .Lxts_dec_loop4 28458c2ecf20Sopenharmony_ci 28468c2ecf20Sopenharmony_ci movups IV, (IVP) 28478c2ecf20Sopenharmony_ci 28488c2ecf20Sopenharmony_ci FRAME_END 28498c2ecf20Sopenharmony_ci RET 28508c2ecf20Sopenharmony_ciSYM_FUNC_END(aesni_xts_decrypt) 28518c2ecf20Sopenharmony_ci 28528c2ecf20Sopenharmony_ci#endif 2853