162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Implement AES algorithm in Intel AES-NI instructions. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * The white paper of AES-NI instructions can be downloaded from: 662306a36Sopenharmony_ci * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright (C) 2008, Intel Corp. 962306a36Sopenharmony_ci * Author: Huang Ying <ying.huang@intel.com> 1062306a36Sopenharmony_ci * Vinodh Gopal <vinodh.gopal@intel.com> 1162306a36Sopenharmony_ci * Kahraman Akdemir 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD 1462306a36Sopenharmony_ci * interface for 64-bit kernels. 1562306a36Sopenharmony_ci * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) 1662306a36Sopenharmony_ci * Aidan O'Mahony (aidan.o.mahony@intel.com) 1762306a36Sopenharmony_ci * Adrian Hoban <adrian.hoban@intel.com> 1862306a36Sopenharmony_ci * James Guilford (james.guilford@intel.com) 1962306a36Sopenharmony_ci * Gabriele Paoloni <gabriele.paoloni@intel.com> 2062306a36Sopenharmony_ci * Tadeusz Struk (tadeusz.struk@intel.com) 2162306a36Sopenharmony_ci * Wajdi Feghali (wajdi.k.feghali@intel.com) 2262306a36Sopenharmony_ci * Copyright (c) 2010, Intel Corporation. 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * Ported x86_64 version to x86: 2562306a36Sopenharmony_ci * Author: Mathias Krause <minipli@googlemail.com> 2662306a36Sopenharmony_ci */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include <linux/linkage.h> 2962306a36Sopenharmony_ci#include <asm/frame.h> 3062306a36Sopenharmony_ci#include <asm/nospec-branch.h> 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* 3362306a36Sopenharmony_ci * The following macros are used to move an (un)aligned 16 byte value to/from 3462306a36Sopenharmony_ci * an XMM register. This can done for either FP or integer values, for FP use 3562306a36Sopenharmony_ci * movaps (move aligned packed single) or integer use movdqa (move double quad 3662306a36Sopenharmony_ci * aligned). It doesn't make a performance difference which instruction is used 3762306a36Sopenharmony_ci * since Nehalem (original Core i7) was released. However, the movaps is a byte 3862306a36Sopenharmony_ci * shorter, so that is the one we'll use for now. (same for unaligned). 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_ci#define MOVADQ movaps 4162306a36Sopenharmony_ci#define MOVUDQ movups 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci#ifdef __x86_64__ 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci# constants in mergeable sections, linker can reorder and merge 4662306a36Sopenharmony_ci.section .rodata.cst16.POLY, "aM", @progbits, 16 4762306a36Sopenharmony_ci.align 16 4862306a36Sopenharmony_ciPOLY: .octa 0xC2000000000000000000000000000001 4962306a36Sopenharmony_ci.section .rodata.cst16.TWOONE, "aM", @progbits, 16 5062306a36Sopenharmony_ci.align 16 5162306a36Sopenharmony_ciTWOONE: .octa 0x00000001000000000000000000000001 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 5462306a36Sopenharmony_ci.align 16 5562306a36Sopenharmony_ciSHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F 5662306a36Sopenharmony_ci.section .rodata.cst16.MASK1, "aM", @progbits, 16 5762306a36Sopenharmony_ci.align 16 5862306a36Sopenharmony_ciMASK1: .octa 0x0000000000000000ffffffffffffffff 5962306a36Sopenharmony_ci.section .rodata.cst16.MASK2, "aM", @progbits, 16 6062306a36Sopenharmony_ci.align 16 6162306a36Sopenharmony_ciMASK2: .octa 0xffffffffffffffff0000000000000000 6262306a36Sopenharmony_ci.section .rodata.cst16.ONE, "aM", @progbits, 16 6362306a36Sopenharmony_ci.align 16 6462306a36Sopenharmony_ciONE: .octa 0x00000000000000000000000000000001 6562306a36Sopenharmony_ci.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 6662306a36Sopenharmony_ci.align 16 6762306a36Sopenharmony_ciF_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 6862306a36Sopenharmony_ci.section .rodata.cst16.dec, "aM", @progbits, 16 6962306a36Sopenharmony_ci.align 16 7062306a36Sopenharmony_cidec: .octa 0x1 7162306a36Sopenharmony_ci.section .rodata.cst16.enc, "aM", @progbits, 16 7262306a36Sopenharmony_ci.align 16 7362306a36Sopenharmony_cienc: .octa 0x2 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci# order of these constants should not change. 7662306a36Sopenharmony_ci# more specifically, ALL_F should follow SHIFT_MASK, 7762306a36Sopenharmony_ci# and zero should follow ALL_F 7862306a36Sopenharmony_ci.section .rodata, "a", @progbits 7962306a36Sopenharmony_ci.align 16 8062306a36Sopenharmony_ciSHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 8162306a36Sopenharmony_ciALL_F: .octa 0xffffffffffffffffffffffffffffffff 8262306a36Sopenharmony_ci .octa 0x00000000000000000000000000000000 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci.text 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci#define STACK_OFFSET 8*3 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci#define AadHash 16*0 9062306a36Sopenharmony_ci#define AadLen 16*1 9162306a36Sopenharmony_ci#define InLen (16*1)+8 9262306a36Sopenharmony_ci#define PBlockEncKey 16*2 9362306a36Sopenharmony_ci#define OrigIV 16*3 9462306a36Sopenharmony_ci#define CurCount 16*4 9562306a36Sopenharmony_ci#define PBlockLen 16*5 9662306a36Sopenharmony_ci#define HashKey 16*6 // store HashKey <<1 mod poly here 9762306a36Sopenharmony_ci#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here 9862306a36Sopenharmony_ci#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here 9962306a36Sopenharmony_ci#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here 10062306a36Sopenharmony_ci#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 10162306a36Sopenharmony_ci // bits of HashKey <<1 mod poly here 10262306a36Sopenharmony_ci //(for Karatsuba purposes) 10362306a36Sopenharmony_ci#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 10462306a36Sopenharmony_ci // bits of HashKey^2 <<1 mod poly here 10562306a36Sopenharmony_ci // (for Karatsuba purposes) 10662306a36Sopenharmony_ci#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 10762306a36Sopenharmony_ci // bits of HashKey^3 <<1 mod poly here 10862306a36Sopenharmony_ci // (for Karatsuba purposes) 10962306a36Sopenharmony_ci#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 11062306a36Sopenharmony_ci // bits of HashKey^4 <<1 mod poly here 11162306a36Sopenharmony_ci // (for Karatsuba purposes) 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci#define arg1 rdi 11462306a36Sopenharmony_ci#define arg2 rsi 11562306a36Sopenharmony_ci#define arg3 rdx 11662306a36Sopenharmony_ci#define arg4 rcx 11762306a36Sopenharmony_ci#define arg5 r8 11862306a36Sopenharmony_ci#define arg6 r9 11962306a36Sopenharmony_ci#define arg7 STACK_OFFSET+8(%rsp) 12062306a36Sopenharmony_ci#define arg8 STACK_OFFSET+16(%rsp) 12162306a36Sopenharmony_ci#define arg9 STACK_OFFSET+24(%rsp) 12262306a36Sopenharmony_ci#define arg10 STACK_OFFSET+32(%rsp) 12362306a36Sopenharmony_ci#define arg11 STACK_OFFSET+40(%rsp) 12462306a36Sopenharmony_ci#define keysize 2*15*16(%arg1) 12562306a36Sopenharmony_ci#endif 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci#define STATE1 %xmm0 12962306a36Sopenharmony_ci#define STATE2 %xmm4 13062306a36Sopenharmony_ci#define STATE3 %xmm5 13162306a36Sopenharmony_ci#define STATE4 %xmm6 13262306a36Sopenharmony_ci#define STATE STATE1 13362306a36Sopenharmony_ci#define IN1 %xmm1 13462306a36Sopenharmony_ci#define IN2 %xmm7 13562306a36Sopenharmony_ci#define IN3 %xmm8 13662306a36Sopenharmony_ci#define IN4 %xmm9 13762306a36Sopenharmony_ci#define IN IN1 13862306a36Sopenharmony_ci#define KEY %xmm2 13962306a36Sopenharmony_ci#define IV %xmm3 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci#define BSWAP_MASK %xmm10 14262306a36Sopenharmony_ci#define CTR %xmm11 14362306a36Sopenharmony_ci#define INC %xmm12 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci#define GF128MUL_MASK %xmm7 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci#ifdef __x86_64__ 14862306a36Sopenharmony_ci#define AREG %rax 14962306a36Sopenharmony_ci#define KEYP %rdi 15062306a36Sopenharmony_ci#define OUTP %rsi 15162306a36Sopenharmony_ci#define UKEYP OUTP 15262306a36Sopenharmony_ci#define INP %rdx 15362306a36Sopenharmony_ci#define LEN %rcx 15462306a36Sopenharmony_ci#define IVP %r8 15562306a36Sopenharmony_ci#define KLEN %r9d 15662306a36Sopenharmony_ci#define T1 %r10 15762306a36Sopenharmony_ci#define TKEYP T1 15862306a36Sopenharmony_ci#define T2 %r11 15962306a36Sopenharmony_ci#define TCTR_LOW T2 16062306a36Sopenharmony_ci#else 16162306a36Sopenharmony_ci#define AREG %eax 16262306a36Sopenharmony_ci#define KEYP %edi 16362306a36Sopenharmony_ci#define OUTP AREG 16462306a36Sopenharmony_ci#define UKEYP OUTP 16562306a36Sopenharmony_ci#define INP %edx 16662306a36Sopenharmony_ci#define LEN %esi 16762306a36Sopenharmony_ci#define IVP %ebp 16862306a36Sopenharmony_ci#define KLEN %ebx 16962306a36Sopenharmony_ci#define T1 %ecx 17062306a36Sopenharmony_ci#define TKEYP T1 17162306a36Sopenharmony_ci#endif 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci.macro FUNC_SAVE 17462306a36Sopenharmony_ci push %r12 17562306a36Sopenharmony_ci push %r13 17662306a36Sopenharmony_ci push %r14 17762306a36Sopenharmony_ci# 17862306a36Sopenharmony_ci# states of %xmm registers %xmm6:%xmm15 not saved 17962306a36Sopenharmony_ci# all %xmm registers are clobbered 18062306a36Sopenharmony_ci# 18162306a36Sopenharmony_ci.endm 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci.macro FUNC_RESTORE 18562306a36Sopenharmony_ci pop %r14 18662306a36Sopenharmony_ci pop %r13 18762306a36Sopenharmony_ci pop %r12 18862306a36Sopenharmony_ci.endm 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci# Precompute hashkeys. 19162306a36Sopenharmony_ci# Input: Hash subkey. 19262306a36Sopenharmony_ci# Output: HashKeys stored in gcm_context_data. Only needs to be called 19362306a36Sopenharmony_ci# once per key. 19462306a36Sopenharmony_ci# clobbers r12, and tmp xmm registers. 19562306a36Sopenharmony_ci.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 19662306a36Sopenharmony_ci mov \SUBKEY, %r12 19762306a36Sopenharmony_ci movdqu (%r12), \TMP3 19862306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), \TMP2 19962306a36Sopenharmony_ci pshufb \TMP2, \TMP3 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci movdqa \TMP3, \TMP2 20462306a36Sopenharmony_ci psllq $1, \TMP3 20562306a36Sopenharmony_ci psrlq $63, \TMP2 20662306a36Sopenharmony_ci movdqa \TMP2, \TMP1 20762306a36Sopenharmony_ci pslldq $8, \TMP2 20862306a36Sopenharmony_ci psrldq $8, \TMP1 20962306a36Sopenharmony_ci por \TMP2, \TMP3 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci # reduce HashKey<<1 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci pshufd $0x24, \TMP1, \TMP2 21462306a36Sopenharmony_ci pcmpeqd TWOONE(%rip), \TMP2 21562306a36Sopenharmony_ci pand POLY(%rip), \TMP2 21662306a36Sopenharmony_ci pxor \TMP2, \TMP3 21762306a36Sopenharmony_ci movdqu \TMP3, HashKey(%arg2) 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci movdqa \TMP3, \TMP5 22062306a36Sopenharmony_ci pshufd $78, \TMP3, \TMP1 22162306a36Sopenharmony_ci pxor \TMP3, \TMP1 22262306a36Sopenharmony_ci movdqu \TMP1, HashKey_k(%arg2) 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 22562306a36Sopenharmony_ci# TMP5 = HashKey^2<<1 (mod poly) 22662306a36Sopenharmony_ci movdqu \TMP5, HashKey_2(%arg2) 22762306a36Sopenharmony_ci# HashKey_2 = HashKey^2<<1 (mod poly) 22862306a36Sopenharmony_ci pshufd $78, \TMP5, \TMP1 22962306a36Sopenharmony_ci pxor \TMP5, \TMP1 23062306a36Sopenharmony_ci movdqu \TMP1, HashKey_2_k(%arg2) 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 23362306a36Sopenharmony_ci# TMP5 = HashKey^3<<1 (mod poly) 23462306a36Sopenharmony_ci movdqu \TMP5, HashKey_3(%arg2) 23562306a36Sopenharmony_ci pshufd $78, \TMP5, \TMP1 23662306a36Sopenharmony_ci pxor \TMP5, \TMP1 23762306a36Sopenharmony_ci movdqu \TMP1, HashKey_3_k(%arg2) 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 24062306a36Sopenharmony_ci# TMP5 = HashKey^3<<1 (mod poly) 24162306a36Sopenharmony_ci movdqu \TMP5, HashKey_4(%arg2) 24262306a36Sopenharmony_ci pshufd $78, \TMP5, \TMP1 24362306a36Sopenharmony_ci pxor \TMP5, \TMP1 24462306a36Sopenharmony_ci movdqu \TMP1, HashKey_4_k(%arg2) 24562306a36Sopenharmony_ci.endm 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. 24862306a36Sopenharmony_ci# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 24962306a36Sopenharmony_ci.macro GCM_INIT Iv SUBKEY AAD AADLEN 25062306a36Sopenharmony_ci mov \AADLEN, %r11 25162306a36Sopenharmony_ci mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length 25262306a36Sopenharmony_ci xor %r11d, %r11d 25362306a36Sopenharmony_ci mov %r11, InLen(%arg2) # ctx_data.in_length = 0 25462306a36Sopenharmony_ci mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 25562306a36Sopenharmony_ci mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 25662306a36Sopenharmony_ci mov \Iv, %rax 25762306a36Sopenharmony_ci movdqu (%rax), %xmm0 25862306a36Sopenharmony_ci movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm2 26162306a36Sopenharmony_ci pshufb %xmm2, %xmm0 26262306a36Sopenharmony_ci movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 26562306a36Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ 26862306a36Sopenharmony_ci %xmm4, %xmm5, %xmm6 26962306a36Sopenharmony_ci.endm 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context 27262306a36Sopenharmony_ci# struct has been initialized by GCM_INIT. 27362306a36Sopenharmony_ci# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK 27462306a36Sopenharmony_ci# Clobbers rax, r10-r13, and xmm0-xmm15 27562306a36Sopenharmony_ci.macro GCM_ENC_DEC operation 27662306a36Sopenharmony_ci movdqu AadHash(%arg2), %xmm8 27762306a36Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 27862306a36Sopenharmony_ci add %arg5, InLen(%arg2) 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci xor %r11d, %r11d # initialise the data pointer offset as zero 28162306a36Sopenharmony_ci PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci sub %r11, %arg5 # sub partial block data used 28462306a36Sopenharmony_ci mov %arg5, %r13 # save the number of bytes 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) 28762306a36Sopenharmony_ci mov %r13, %r12 28862306a36Sopenharmony_ci # Encrypt/Decrypt first few blocks 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci and $(3<<4), %r12 29162306a36Sopenharmony_ci jz .L_initial_num_blocks_is_0_\@ 29262306a36Sopenharmony_ci cmp $(2<<4), %r12 29362306a36Sopenharmony_ci jb .L_initial_num_blocks_is_1_\@ 29462306a36Sopenharmony_ci je .L_initial_num_blocks_is_2_\@ 29562306a36Sopenharmony_ci.L_initial_num_blocks_is_3_\@: 29662306a36Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 29762306a36Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation 29862306a36Sopenharmony_ci sub $48, %r13 29962306a36Sopenharmony_ci jmp .L_initial_blocks_\@ 30062306a36Sopenharmony_ci.L_initial_num_blocks_is_2_\@: 30162306a36Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 30262306a36Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation 30362306a36Sopenharmony_ci sub $32, %r13 30462306a36Sopenharmony_ci jmp .L_initial_blocks_\@ 30562306a36Sopenharmony_ci.L_initial_num_blocks_is_1_\@: 30662306a36Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 30762306a36Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation 30862306a36Sopenharmony_ci sub $16, %r13 30962306a36Sopenharmony_ci jmp .L_initial_blocks_\@ 31062306a36Sopenharmony_ci.L_initial_num_blocks_is_0_\@: 31162306a36Sopenharmony_ci INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ 31262306a36Sopenharmony_ci%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation 31362306a36Sopenharmony_ci.L_initial_blocks_\@: 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci # Main loop - Encrypt/Decrypt remaining blocks 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci test %r13, %r13 31862306a36Sopenharmony_ci je .L_zero_cipher_left_\@ 31962306a36Sopenharmony_ci sub $64, %r13 32062306a36Sopenharmony_ci je .L_four_cipher_left_\@ 32162306a36Sopenharmony_ci.L_crypt_by_4_\@: 32262306a36Sopenharmony_ci GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ 32362306a36Sopenharmony_ci %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ 32462306a36Sopenharmony_ci %xmm7, %xmm8, enc 32562306a36Sopenharmony_ci add $64, %r11 32662306a36Sopenharmony_ci sub $64, %r13 32762306a36Sopenharmony_ci jne .L_crypt_by_4_\@ 32862306a36Sopenharmony_ci.L_four_cipher_left_\@: 32962306a36Sopenharmony_ci GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ 33062306a36Sopenharmony_ci%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 33162306a36Sopenharmony_ci.L_zero_cipher_left_\@: 33262306a36Sopenharmony_ci movdqu %xmm8, AadHash(%arg2) 33362306a36Sopenharmony_ci movdqu %xmm0, CurCount(%arg2) 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci mov %arg5, %r13 33662306a36Sopenharmony_ci and $15, %r13 # %r13 = arg5 (mod 16) 33762306a36Sopenharmony_ci je .L_multiple_of_16_bytes_\@ 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci mov %r13, PBlockLen(%arg2) 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci # Handle the last <16 Byte block separately 34262306a36Sopenharmony_ci paddd ONE(%rip), %xmm0 # INCR CNT to get Yn 34362306a36Sopenharmony_ci movdqu %xmm0, CurCount(%arg2) 34462306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 34562306a36Sopenharmony_ci pshufb %xmm10, %xmm0 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) 34862306a36Sopenharmony_ci movdqu %xmm0, PBlockEncKey(%arg2) 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci cmp $16, %arg5 35162306a36Sopenharmony_ci jge .L_large_enough_update_\@ 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci lea (%arg4,%r11,1), %r10 35462306a36Sopenharmony_ci mov %r13, %r12 35562306a36Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 35662306a36Sopenharmony_ci jmp .L_data_read_\@ 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci.L_large_enough_update_\@: 35962306a36Sopenharmony_ci sub $16, %r11 36062306a36Sopenharmony_ci add %r13, %r11 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci # receive the last <16 Byte block 36362306a36Sopenharmony_ci movdqu (%arg4, %r11, 1), %xmm1 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci sub %r13, %r11 36662306a36Sopenharmony_ci add $16, %r11 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci lea SHIFT_MASK+16(%rip), %r12 36962306a36Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift 16-r13 bytes 37062306a36Sopenharmony_ci # (r13 is the number of bytes in plaintext mod 16) 37162306a36Sopenharmony_ci sub %r13, %r12 37262306a36Sopenharmony_ci # get the appropriate shuffle mask 37362306a36Sopenharmony_ci movdqu (%r12), %xmm2 37462306a36Sopenharmony_ci # shift right 16-r13 bytes 37562306a36Sopenharmony_ci pshufb %xmm2, %xmm1 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci.L_data_read_\@: 37862306a36Sopenharmony_ci lea ALL_F+16(%rip), %r12 37962306a36Sopenharmony_ci sub %r13, %r12 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci.ifc \operation, dec 38262306a36Sopenharmony_ci movdqa %xmm1, %xmm2 38362306a36Sopenharmony_ci.endif 38462306a36Sopenharmony_ci pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) 38562306a36Sopenharmony_ci movdqu (%r12), %xmm1 38662306a36Sopenharmony_ci # get the appropriate mask to mask out top 16-r13 bytes of xmm0 38762306a36Sopenharmony_ci pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 38862306a36Sopenharmony_ci.ifc \operation, dec 38962306a36Sopenharmony_ci pand %xmm1, %xmm2 39062306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 39162306a36Sopenharmony_ci pshufb %xmm10 ,%xmm2 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci pxor %xmm2, %xmm8 39462306a36Sopenharmony_ci.else 39562306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 39662306a36Sopenharmony_ci pshufb %xmm10,%xmm0 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci pxor %xmm0, %xmm8 39962306a36Sopenharmony_ci.endif 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci movdqu %xmm8, AadHash(%arg2) 40262306a36Sopenharmony_ci.ifc \operation, enc 40362306a36Sopenharmony_ci # GHASH computation for the last <16 byte block 40462306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 40562306a36Sopenharmony_ci # shuffle xmm0 back to output as ciphertext 40662306a36Sopenharmony_ci pshufb %xmm10, %xmm0 40762306a36Sopenharmony_ci.endif 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci # Output %r13 bytes 41062306a36Sopenharmony_ci movq %xmm0, %rax 41162306a36Sopenharmony_ci cmp $8, %r13 41262306a36Sopenharmony_ci jle .L_less_than_8_bytes_left_\@ 41362306a36Sopenharmony_ci mov %rax, (%arg3 , %r11, 1) 41462306a36Sopenharmony_ci add $8, %r11 41562306a36Sopenharmony_ci psrldq $8, %xmm0 41662306a36Sopenharmony_ci movq %xmm0, %rax 41762306a36Sopenharmony_ci sub $8, %r13 41862306a36Sopenharmony_ci.L_less_than_8_bytes_left_\@: 41962306a36Sopenharmony_ci mov %al, (%arg3, %r11, 1) 42062306a36Sopenharmony_ci add $1, %r11 42162306a36Sopenharmony_ci shr $8, %rax 42262306a36Sopenharmony_ci sub $1, %r13 42362306a36Sopenharmony_ci jne .L_less_than_8_bytes_left_\@ 42462306a36Sopenharmony_ci.L_multiple_of_16_bytes_\@: 42562306a36Sopenharmony_ci.endm 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci# GCM_COMPLETE Finishes update of tag of last partial block 42862306a36Sopenharmony_ci# Output: Authorization Tag (AUTH_TAG) 42962306a36Sopenharmony_ci# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 43062306a36Sopenharmony_ci.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN 43162306a36Sopenharmony_ci movdqu AadHash(%arg2), %xmm8 43262306a36Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci mov PBlockLen(%arg2), %r12 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci test %r12, %r12 43762306a36Sopenharmony_ci je .L_partial_done\@ 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci.L_partial_done\@: 44262306a36Sopenharmony_ci mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) 44362306a36Sopenharmony_ci shl $3, %r12 # convert into number of bits 44462306a36Sopenharmony_ci movd %r12d, %xmm15 # len(A) in %xmm15 44562306a36Sopenharmony_ci mov InLen(%arg2), %r12 44662306a36Sopenharmony_ci shl $3, %r12 # len(C) in bits (*128) 44762306a36Sopenharmony_ci movq %r12, %xmm1 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 45062306a36Sopenharmony_ci pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) 45162306a36Sopenharmony_ci pxor %xmm15, %xmm8 45262306a36Sopenharmony_ci GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 45362306a36Sopenharmony_ci # final GHASH computation 45462306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 45562306a36Sopenharmony_ci pshufb %xmm10, %xmm8 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 45862306a36Sopenharmony_ci ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) 45962306a36Sopenharmony_ci pxor %xmm8, %xmm0 46062306a36Sopenharmony_ci.L_return_T_\@: 46162306a36Sopenharmony_ci mov \AUTHTAG, %r10 # %r10 = authTag 46262306a36Sopenharmony_ci mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len 46362306a36Sopenharmony_ci cmp $16, %r11 46462306a36Sopenharmony_ci je .L_T_16_\@ 46562306a36Sopenharmony_ci cmp $8, %r11 46662306a36Sopenharmony_ci jl .L_T_4_\@ 46762306a36Sopenharmony_ci.L_T_8_\@: 46862306a36Sopenharmony_ci movq %xmm0, %rax 46962306a36Sopenharmony_ci mov %rax, (%r10) 47062306a36Sopenharmony_ci add $8, %r10 47162306a36Sopenharmony_ci sub $8, %r11 47262306a36Sopenharmony_ci psrldq $8, %xmm0 47362306a36Sopenharmony_ci test %r11, %r11 47462306a36Sopenharmony_ci je .L_return_T_done_\@ 47562306a36Sopenharmony_ci.L_T_4_\@: 47662306a36Sopenharmony_ci movd %xmm0, %eax 47762306a36Sopenharmony_ci mov %eax, (%r10) 47862306a36Sopenharmony_ci add $4, %r10 47962306a36Sopenharmony_ci sub $4, %r11 48062306a36Sopenharmony_ci psrldq $4, %xmm0 48162306a36Sopenharmony_ci test %r11, %r11 48262306a36Sopenharmony_ci je .L_return_T_done_\@ 48362306a36Sopenharmony_ci.L_T_123_\@: 48462306a36Sopenharmony_ci movd %xmm0, %eax 48562306a36Sopenharmony_ci cmp $2, %r11 48662306a36Sopenharmony_ci jl .L_T_1_\@ 48762306a36Sopenharmony_ci mov %ax, (%r10) 48862306a36Sopenharmony_ci cmp $2, %r11 48962306a36Sopenharmony_ci je .L_return_T_done_\@ 49062306a36Sopenharmony_ci add $2, %r10 49162306a36Sopenharmony_ci sar $16, %eax 49262306a36Sopenharmony_ci.L_T_1_\@: 49362306a36Sopenharmony_ci mov %al, (%r10) 49462306a36Sopenharmony_ci jmp .L_return_T_done_\@ 49562306a36Sopenharmony_ci.L_T_16_\@: 49662306a36Sopenharmony_ci movdqu %xmm0, (%r10) 49762306a36Sopenharmony_ci.L_return_T_done_\@: 49862306a36Sopenharmony_ci.endm 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci#ifdef __x86_64__ 50162306a36Sopenharmony_ci/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 50262306a36Sopenharmony_ci* 50362306a36Sopenharmony_ci* 50462306a36Sopenharmony_ci* Input: A and B (128-bits each, bit-reflected) 50562306a36Sopenharmony_ci* Output: C = A*B*x mod poly, (i.e. >>1 ) 50662306a36Sopenharmony_ci* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input 50762306a36Sopenharmony_ci* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. 50862306a36Sopenharmony_ci* 50962306a36Sopenharmony_ci*/ 51062306a36Sopenharmony_ci.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 51162306a36Sopenharmony_ci movdqa \GH, \TMP1 51262306a36Sopenharmony_ci pshufd $78, \GH, \TMP2 51362306a36Sopenharmony_ci pshufd $78, \HK, \TMP3 51462306a36Sopenharmony_ci pxor \GH, \TMP2 # TMP2 = a1+a0 51562306a36Sopenharmony_ci pxor \HK, \TMP3 # TMP3 = b1+b0 51662306a36Sopenharmony_ci pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1 51762306a36Sopenharmony_ci pclmulqdq $0x00, \HK, \GH # GH = a0*b0 51862306a36Sopenharmony_ci pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) 51962306a36Sopenharmony_ci pxor \GH, \TMP2 52062306a36Sopenharmony_ci pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) 52162306a36Sopenharmony_ci movdqa \TMP2, \TMP3 52262306a36Sopenharmony_ci pslldq $8, \TMP3 # left shift TMP3 2 DWs 52362306a36Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 52462306a36Sopenharmony_ci pxor \TMP3, \GH 52562306a36Sopenharmony_ci pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci # first phase of the reduction 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci movdqa \GH, \TMP2 53062306a36Sopenharmony_ci movdqa \GH, \TMP3 53162306a36Sopenharmony_ci movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 53262306a36Sopenharmony_ci # in in order to perform 53362306a36Sopenharmony_ci # independent shifts 53462306a36Sopenharmony_ci pslld $31, \TMP2 # packed right shift <<31 53562306a36Sopenharmony_ci pslld $30, \TMP3 # packed right shift <<30 53662306a36Sopenharmony_ci pslld $25, \TMP4 # packed right shift <<25 53762306a36Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 53862306a36Sopenharmony_ci pxor \TMP4, \TMP2 53962306a36Sopenharmony_ci movdqa \TMP2, \TMP5 54062306a36Sopenharmony_ci psrldq $4, \TMP5 # right shift TMP5 1 DW 54162306a36Sopenharmony_ci pslldq $12, \TMP2 # left shift TMP2 3 DWs 54262306a36Sopenharmony_ci pxor \TMP2, \GH 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci # second phase of the reduction 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 54762306a36Sopenharmony_ci # in in order to perform 54862306a36Sopenharmony_ci # independent shifts 54962306a36Sopenharmony_ci movdqa \GH,\TMP3 55062306a36Sopenharmony_ci movdqa \GH,\TMP4 55162306a36Sopenharmony_ci psrld $1,\TMP2 # packed left shift >>1 55262306a36Sopenharmony_ci psrld $2,\TMP3 # packed left shift >>2 55362306a36Sopenharmony_ci psrld $7,\TMP4 # packed left shift >>7 55462306a36Sopenharmony_ci pxor \TMP3,\TMP2 # xor the shifted versions 55562306a36Sopenharmony_ci pxor \TMP4,\TMP2 55662306a36Sopenharmony_ci pxor \TMP5, \TMP2 55762306a36Sopenharmony_ci pxor \TMP2, \GH 55862306a36Sopenharmony_ci pxor \TMP1, \GH # result is in TMP1 55962306a36Sopenharmony_ci.endm 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci# Reads DLEN bytes starting at DPTR and stores in XMMDst 56262306a36Sopenharmony_ci# where 0 < DLEN < 16 56362306a36Sopenharmony_ci# Clobbers %rax, DLEN and XMM1 56462306a36Sopenharmony_ci.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst 56562306a36Sopenharmony_ci cmp $8, \DLEN 56662306a36Sopenharmony_ci jl .L_read_lt8_\@ 56762306a36Sopenharmony_ci mov (\DPTR), %rax 56862306a36Sopenharmony_ci movq %rax, \XMMDst 56962306a36Sopenharmony_ci sub $8, \DLEN 57062306a36Sopenharmony_ci jz .L_done_read_partial_block_\@ 57162306a36Sopenharmony_ci xor %eax, %eax 57262306a36Sopenharmony_ci.L_read_next_byte_\@: 57362306a36Sopenharmony_ci shl $8, %rax 57462306a36Sopenharmony_ci mov 7(\DPTR, \DLEN, 1), %al 57562306a36Sopenharmony_ci dec \DLEN 57662306a36Sopenharmony_ci jnz .L_read_next_byte_\@ 57762306a36Sopenharmony_ci movq %rax, \XMM1 57862306a36Sopenharmony_ci pslldq $8, \XMM1 57962306a36Sopenharmony_ci por \XMM1, \XMMDst 58062306a36Sopenharmony_ci jmp .L_done_read_partial_block_\@ 58162306a36Sopenharmony_ci.L_read_lt8_\@: 58262306a36Sopenharmony_ci xor %eax, %eax 58362306a36Sopenharmony_ci.L_read_next_byte_lt8_\@: 58462306a36Sopenharmony_ci shl $8, %rax 58562306a36Sopenharmony_ci mov -1(\DPTR, \DLEN, 1), %al 58662306a36Sopenharmony_ci dec \DLEN 58762306a36Sopenharmony_ci jnz .L_read_next_byte_lt8_\@ 58862306a36Sopenharmony_ci movq %rax, \XMMDst 58962306a36Sopenharmony_ci.L_done_read_partial_block_\@: 59062306a36Sopenharmony_ci.endm 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. 59362306a36Sopenharmony_ci# clobbers r10-11, xmm14 59462306a36Sopenharmony_ci.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ 59562306a36Sopenharmony_ci TMP6 TMP7 59662306a36Sopenharmony_ci MOVADQ SHUF_MASK(%rip), %xmm14 59762306a36Sopenharmony_ci mov \AAD, %r10 # %r10 = AAD 59862306a36Sopenharmony_ci mov \AADLEN, %r11 # %r11 = aadLen 59962306a36Sopenharmony_ci pxor \TMP7, \TMP7 60062306a36Sopenharmony_ci pxor \TMP6, \TMP6 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci cmp $16, %r11 60362306a36Sopenharmony_ci jl .L_get_AAD_rest\@ 60462306a36Sopenharmony_ci.L_get_AAD_blocks\@: 60562306a36Sopenharmony_ci movdqu (%r10), \TMP7 60662306a36Sopenharmony_ci pshufb %xmm14, \TMP7 # byte-reflect the AAD data 60762306a36Sopenharmony_ci pxor \TMP7, \TMP6 60862306a36Sopenharmony_ci GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 60962306a36Sopenharmony_ci add $16, %r10 61062306a36Sopenharmony_ci sub $16, %r11 61162306a36Sopenharmony_ci cmp $16, %r11 61262306a36Sopenharmony_ci jge .L_get_AAD_blocks\@ 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci movdqu \TMP6, \TMP7 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci /* read the last <16B of AAD */ 61762306a36Sopenharmony_ci.L_get_AAD_rest\@: 61862306a36Sopenharmony_ci test %r11, %r11 61962306a36Sopenharmony_ci je .L_get_AAD_done\@ 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 62262306a36Sopenharmony_ci pshufb %xmm14, \TMP7 # byte-reflect the AAD data 62362306a36Sopenharmony_ci pxor \TMP6, \TMP7 62462306a36Sopenharmony_ci GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 62562306a36Sopenharmony_ci movdqu \TMP7, \TMP6 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci.L_get_AAD_done\@: 62862306a36Sopenharmony_ci movdqu \TMP6, AadHash(%arg2) 62962306a36Sopenharmony_ci.endm 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks 63262306a36Sopenharmony_ci# between update calls. 63362306a36Sopenharmony_ci# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK 63462306a36Sopenharmony_ci# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context 63562306a36Sopenharmony_ci# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 63662306a36Sopenharmony_ci.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ 63762306a36Sopenharmony_ci AAD_HASH operation 63862306a36Sopenharmony_ci mov PBlockLen(%arg2), %r13 63962306a36Sopenharmony_ci test %r13, %r13 64062306a36Sopenharmony_ci je .L_partial_block_done_\@ # Leave Macro if no partial blocks 64162306a36Sopenharmony_ci # Read in input data without over reading 64262306a36Sopenharmony_ci cmp $16, \PLAIN_CYPH_LEN 64362306a36Sopenharmony_ci jl .L_fewer_than_16_bytes_\@ 64462306a36Sopenharmony_ci movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm 64562306a36Sopenharmony_ci jmp .L_data_read_\@ 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci.L_fewer_than_16_bytes_\@: 64862306a36Sopenharmony_ci lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 64962306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r12 65062306a36Sopenharmony_ci READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci mov PBlockLen(%arg2), %r13 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci.L_data_read_\@: # Finished reading in data 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci movdqu PBlockEncKey(%arg2), %xmm9 65762306a36Sopenharmony_ci movdqu HashKey(%arg2), %xmm13 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci lea SHIFT_MASK(%rip), %r12 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci # adjust the shuffle mask pointer to be able to shift r13 bytes 66262306a36Sopenharmony_ci # r16-r13 is the number of bytes in plaintext mod 16) 66362306a36Sopenharmony_ci add %r13, %r12 66462306a36Sopenharmony_ci movdqu (%r12), %xmm2 # get the appropriate shuffle mask 66562306a36Sopenharmony_ci pshufb %xmm2, %xmm9 # shift right r13 bytes 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci.ifc \operation, dec 66862306a36Sopenharmony_ci movdqa %xmm1, %xmm3 66962306a36Sopenharmony_ci pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 67262306a36Sopenharmony_ci add %r13, %r10 67362306a36Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 67462306a36Sopenharmony_ci sub $16, %r10 67562306a36Sopenharmony_ci # Determine if if partial block is not being filled and 67662306a36Sopenharmony_ci # shift mask accordingly 67762306a36Sopenharmony_ci jge .L_no_extra_mask_1_\@ 67862306a36Sopenharmony_ci sub %r10, %r12 67962306a36Sopenharmony_ci.L_no_extra_mask_1_\@: 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 68262306a36Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 68362306a36Sopenharmony_ci pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci pand %xmm1, %xmm3 68662306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 68762306a36Sopenharmony_ci pshufb %xmm10, %xmm3 68862306a36Sopenharmony_ci pshufb %xmm2, %xmm3 68962306a36Sopenharmony_ci pxor %xmm3, \AAD_HASH 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci test %r10, %r10 69262306a36Sopenharmony_ci jl .L_partial_incomplete_1_\@ 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci # GHASH computation for the last <16 Byte block 69562306a36Sopenharmony_ci GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 69662306a36Sopenharmony_ci xor %eax, %eax 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci mov %rax, PBlockLen(%arg2) 69962306a36Sopenharmony_ci jmp .L_dec_done_\@ 70062306a36Sopenharmony_ci.L_partial_incomplete_1_\@: 70162306a36Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(%arg2) 70262306a36Sopenharmony_ci.L_dec_done_\@: 70362306a36Sopenharmony_ci movdqu \AAD_HASH, AadHash(%arg2) 70462306a36Sopenharmony_ci.else 70562306a36Sopenharmony_ci pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r10 70862306a36Sopenharmony_ci add %r13, %r10 70962306a36Sopenharmony_ci # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling 71062306a36Sopenharmony_ci sub $16, %r10 71162306a36Sopenharmony_ci # Determine if if partial block is not being filled and 71262306a36Sopenharmony_ci # shift mask accordingly 71362306a36Sopenharmony_ci jge .L_no_extra_mask_2_\@ 71462306a36Sopenharmony_ci sub %r10, %r12 71562306a36Sopenharmony_ci.L_no_extra_mask_2_\@: 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 71862306a36Sopenharmony_ci # get the appropriate mask to mask out bottom r13 bytes of xmm9 71962306a36Sopenharmony_ci pand %xmm1, %xmm9 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm1 72262306a36Sopenharmony_ci pshufb %xmm1, %xmm9 72362306a36Sopenharmony_ci pshufb %xmm2, %xmm9 72462306a36Sopenharmony_ci pxor %xmm9, \AAD_HASH 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci test %r10, %r10 72762306a36Sopenharmony_ci jl .L_partial_incomplete_2_\@ 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci # GHASH computation for the last <16 Byte block 73062306a36Sopenharmony_ci GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 73162306a36Sopenharmony_ci xor %eax, %eax 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci mov %rax, PBlockLen(%arg2) 73462306a36Sopenharmony_ci jmp .L_encode_done_\@ 73562306a36Sopenharmony_ci.L_partial_incomplete_2_\@: 73662306a36Sopenharmony_ci add \PLAIN_CYPH_LEN, PBlockLen(%arg2) 73762306a36Sopenharmony_ci.L_encode_done_\@: 73862306a36Sopenharmony_ci movdqu \AAD_HASH, AadHash(%arg2) 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm10 74162306a36Sopenharmony_ci # shuffle xmm9 back to output as ciphertext 74262306a36Sopenharmony_ci pshufb %xmm10, %xmm9 74362306a36Sopenharmony_ci pshufb %xmm2, %xmm9 74462306a36Sopenharmony_ci.endif 74562306a36Sopenharmony_ci # output encrypted Bytes 74662306a36Sopenharmony_ci test %r10, %r10 74762306a36Sopenharmony_ci jl .L_partial_fill_\@ 74862306a36Sopenharmony_ci mov %r13, %r12 74962306a36Sopenharmony_ci mov $16, %r13 75062306a36Sopenharmony_ci # Set r13 to be the number of bytes to write out 75162306a36Sopenharmony_ci sub %r12, %r13 75262306a36Sopenharmony_ci jmp .L_count_set_\@ 75362306a36Sopenharmony_ci.L_partial_fill_\@: 75462306a36Sopenharmony_ci mov \PLAIN_CYPH_LEN, %r13 75562306a36Sopenharmony_ci.L_count_set_\@: 75662306a36Sopenharmony_ci movdqa %xmm9, %xmm0 75762306a36Sopenharmony_ci movq %xmm0, %rax 75862306a36Sopenharmony_ci cmp $8, %r13 75962306a36Sopenharmony_ci jle .L_less_than_8_bytes_left_\@ 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 76262306a36Sopenharmony_ci add $8, \DATA_OFFSET 76362306a36Sopenharmony_ci psrldq $8, %xmm0 76462306a36Sopenharmony_ci movq %xmm0, %rax 76562306a36Sopenharmony_ci sub $8, %r13 76662306a36Sopenharmony_ci.L_less_than_8_bytes_left_\@: 76762306a36Sopenharmony_ci movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) 76862306a36Sopenharmony_ci add $1, \DATA_OFFSET 76962306a36Sopenharmony_ci shr $8, %rax 77062306a36Sopenharmony_ci sub $1, %r13 77162306a36Sopenharmony_ci jne .L_less_than_8_bytes_left_\@ 77262306a36Sopenharmony_ci.L_partial_block_done_\@: 77362306a36Sopenharmony_ci.endm # PARTIAL_BLOCK 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci/* 77662306a36Sopenharmony_ci* if a = number of total plaintext bytes 77762306a36Sopenharmony_ci* b = floor(a/16) 77862306a36Sopenharmony_ci* num_initial_blocks = b mod 4 77962306a36Sopenharmony_ci* encrypt the initial num_initial_blocks blocks and apply ghash on 78062306a36Sopenharmony_ci* the ciphertext 78162306a36Sopenharmony_ci* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers 78262306a36Sopenharmony_ci* are clobbered 78362306a36Sopenharmony_ci* arg1, %arg2, %arg3 are used as a pointer only, not modified 78462306a36Sopenharmony_ci*/ 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ 78862306a36Sopenharmony_ci XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation 78962306a36Sopenharmony_ci MOVADQ SHUF_MASK(%rip), %xmm14 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci # start AES for num_initial_blocks blocks 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci.if (\i == 5) || (\i == 6) || (\i == 7) 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci MOVADQ ONE(%RIP),\TMP1 80062306a36Sopenharmony_ci MOVADQ 0(%arg1),\TMP2 80162306a36Sopenharmony_ci.irpc index, \i_seq 80262306a36Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 80362306a36Sopenharmony_ci.ifc \operation, dec 80462306a36Sopenharmony_ci movdqa \XMM0, %xmm\index 80562306a36Sopenharmony_ci.else 80662306a36Sopenharmony_ci MOVADQ \XMM0, %xmm\index 80762306a36Sopenharmony_ci.endif 80862306a36Sopenharmony_ci pshufb %xmm14, %xmm\index # perform a 16 byte swap 80962306a36Sopenharmony_ci pxor \TMP2, %xmm\index 81062306a36Sopenharmony_ci.endr 81162306a36Sopenharmony_ci lea 0x10(%arg1),%r10 81262306a36Sopenharmony_ci mov keysize,%eax 81362306a36Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 81462306a36Sopenharmony_ci add $5,%eax # 128->9, 192->11, 256->13 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci.Laes_loop_initial_\@: 81762306a36Sopenharmony_ci MOVADQ (%r10),\TMP1 81862306a36Sopenharmony_ci.irpc index, \i_seq 81962306a36Sopenharmony_ci aesenc \TMP1, %xmm\index 82062306a36Sopenharmony_ci.endr 82162306a36Sopenharmony_ci add $16,%r10 82262306a36Sopenharmony_ci sub $1,%eax 82362306a36Sopenharmony_ci jnz .Laes_loop_initial_\@ 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci MOVADQ (%r10), \TMP1 82662306a36Sopenharmony_ci.irpc index, \i_seq 82762306a36Sopenharmony_ci aesenclast \TMP1, %xmm\index # Last Round 82862306a36Sopenharmony_ci.endr 82962306a36Sopenharmony_ci.irpc index, \i_seq 83062306a36Sopenharmony_ci movdqu (%arg4 , %r11, 1), \TMP1 83162306a36Sopenharmony_ci pxor \TMP1, %xmm\index 83262306a36Sopenharmony_ci movdqu %xmm\index, (%arg3 , %r11, 1) 83362306a36Sopenharmony_ci # write back plaintext/ciphertext for num_initial_blocks 83462306a36Sopenharmony_ci add $16, %r11 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci.ifc \operation, dec 83762306a36Sopenharmony_ci movdqa \TMP1, %xmm\index 83862306a36Sopenharmony_ci.endif 83962306a36Sopenharmony_ci pshufb %xmm14, %xmm\index 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci # prepare plaintext/ciphertext for GHASH computation 84262306a36Sopenharmony_ci.endr 84362306a36Sopenharmony_ci.endif 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci # apply GHASH on num_initial_blocks blocks 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci.if \i == 5 84862306a36Sopenharmony_ci pxor %xmm5, %xmm6 84962306a36Sopenharmony_ci GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 85062306a36Sopenharmony_ci pxor %xmm6, %xmm7 85162306a36Sopenharmony_ci GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 85262306a36Sopenharmony_ci pxor %xmm7, %xmm8 85362306a36Sopenharmony_ci GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 85462306a36Sopenharmony_ci.elseif \i == 6 85562306a36Sopenharmony_ci pxor %xmm6, %xmm7 85662306a36Sopenharmony_ci GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 85762306a36Sopenharmony_ci pxor %xmm7, %xmm8 85862306a36Sopenharmony_ci GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 85962306a36Sopenharmony_ci.elseif \i == 7 86062306a36Sopenharmony_ci pxor %xmm7, %xmm8 86162306a36Sopenharmony_ci GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 86262306a36Sopenharmony_ci.endif 86362306a36Sopenharmony_ci cmp $64, %r13 86462306a36Sopenharmony_ci jl .L_initial_blocks_done\@ 86562306a36Sopenharmony_ci # no need for precomputed values 86662306a36Sopenharmony_ci/* 86762306a36Sopenharmony_ci* 86862306a36Sopenharmony_ci* Precomputations for HashKey parallel with encryption of first 4 blocks. 86962306a36Sopenharmony_ci* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i 87062306a36Sopenharmony_ci*/ 87162306a36Sopenharmony_ci MOVADQ ONE(%RIP),\TMP1 87262306a36Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 87362306a36Sopenharmony_ci MOVADQ \XMM0, \XMM1 87462306a36Sopenharmony_ci pshufb %xmm14, \XMM1 # perform a 16 byte swap 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 87762306a36Sopenharmony_ci MOVADQ \XMM0, \XMM2 87862306a36Sopenharmony_ci pshufb %xmm14, \XMM2 # perform a 16 byte swap 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 88162306a36Sopenharmony_ci MOVADQ \XMM0, \XMM3 88262306a36Sopenharmony_ci pshufb %xmm14, \XMM3 # perform a 16 byte swap 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci paddd \TMP1, \XMM0 # INCR Y0 88562306a36Sopenharmony_ci MOVADQ \XMM0, \XMM4 88662306a36Sopenharmony_ci pshufb %xmm14, \XMM4 # perform a 16 byte swap 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci MOVADQ 0(%arg1),\TMP1 88962306a36Sopenharmony_ci pxor \TMP1, \XMM1 89062306a36Sopenharmony_ci pxor \TMP1, \XMM2 89162306a36Sopenharmony_ci pxor \TMP1, \XMM3 89262306a36Sopenharmony_ci pxor \TMP1, \XMM4 89362306a36Sopenharmony_ci.irpc index, 1234 # do 4 rounds 89462306a36Sopenharmony_ci movaps 0x10*\index(%arg1), \TMP1 89562306a36Sopenharmony_ci aesenc \TMP1, \XMM1 89662306a36Sopenharmony_ci aesenc \TMP1, \XMM2 89762306a36Sopenharmony_ci aesenc \TMP1, \XMM3 89862306a36Sopenharmony_ci aesenc \TMP1, \XMM4 89962306a36Sopenharmony_ci.endr 90062306a36Sopenharmony_ci.irpc index, 56789 # do next 5 rounds 90162306a36Sopenharmony_ci movaps 0x10*\index(%arg1), \TMP1 90262306a36Sopenharmony_ci aesenc \TMP1, \XMM1 90362306a36Sopenharmony_ci aesenc \TMP1, \XMM2 90462306a36Sopenharmony_ci aesenc \TMP1, \XMM3 90562306a36Sopenharmony_ci aesenc \TMP1, \XMM4 90662306a36Sopenharmony_ci.endr 90762306a36Sopenharmony_ci lea 0xa0(%arg1),%r10 90862306a36Sopenharmony_ci mov keysize,%eax 90962306a36Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 91062306a36Sopenharmony_ci sub $4,%eax # 128->0, 192->2, 256->4 91162306a36Sopenharmony_ci jz .Laes_loop_pre_done\@ 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci.Laes_loop_pre_\@: 91462306a36Sopenharmony_ci MOVADQ (%r10),\TMP2 91562306a36Sopenharmony_ci.irpc index, 1234 91662306a36Sopenharmony_ci aesenc \TMP2, %xmm\index 91762306a36Sopenharmony_ci.endr 91862306a36Sopenharmony_ci add $16,%r10 91962306a36Sopenharmony_ci sub $1,%eax 92062306a36Sopenharmony_ci jnz .Laes_loop_pre_\@ 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci.Laes_loop_pre_done\@: 92362306a36Sopenharmony_ci MOVADQ (%r10), \TMP2 92462306a36Sopenharmony_ci aesenclast \TMP2, \XMM1 92562306a36Sopenharmony_ci aesenclast \TMP2, \XMM2 92662306a36Sopenharmony_ci aesenclast \TMP2, \XMM3 92762306a36Sopenharmony_ci aesenclast \TMP2, \XMM4 92862306a36Sopenharmony_ci movdqu 16*0(%arg4 , %r11 , 1), \TMP1 92962306a36Sopenharmony_ci pxor \TMP1, \XMM1 93062306a36Sopenharmony_ci.ifc \operation, dec 93162306a36Sopenharmony_ci movdqu \XMM1, 16*0(%arg3 , %r11 , 1) 93262306a36Sopenharmony_ci movdqa \TMP1, \XMM1 93362306a36Sopenharmony_ci.endif 93462306a36Sopenharmony_ci movdqu 16*1(%arg4 , %r11 , 1), \TMP1 93562306a36Sopenharmony_ci pxor \TMP1, \XMM2 93662306a36Sopenharmony_ci.ifc \operation, dec 93762306a36Sopenharmony_ci movdqu \XMM2, 16*1(%arg3 , %r11 , 1) 93862306a36Sopenharmony_ci movdqa \TMP1, \XMM2 93962306a36Sopenharmony_ci.endif 94062306a36Sopenharmony_ci movdqu 16*2(%arg4 , %r11 , 1), \TMP1 94162306a36Sopenharmony_ci pxor \TMP1, \XMM3 94262306a36Sopenharmony_ci.ifc \operation, dec 94362306a36Sopenharmony_ci movdqu \XMM3, 16*2(%arg3 , %r11 , 1) 94462306a36Sopenharmony_ci movdqa \TMP1, \XMM3 94562306a36Sopenharmony_ci.endif 94662306a36Sopenharmony_ci movdqu 16*3(%arg4 , %r11 , 1), \TMP1 94762306a36Sopenharmony_ci pxor \TMP1, \XMM4 94862306a36Sopenharmony_ci.ifc \operation, dec 94962306a36Sopenharmony_ci movdqu \XMM4, 16*3(%arg3 , %r11 , 1) 95062306a36Sopenharmony_ci movdqa \TMP1, \XMM4 95162306a36Sopenharmony_ci.else 95262306a36Sopenharmony_ci movdqu \XMM1, 16*0(%arg3 , %r11 , 1) 95362306a36Sopenharmony_ci movdqu \XMM2, 16*1(%arg3 , %r11 , 1) 95462306a36Sopenharmony_ci movdqu \XMM3, 16*2(%arg3 , %r11 , 1) 95562306a36Sopenharmony_ci movdqu \XMM4, 16*3(%arg3 , %r11 , 1) 95662306a36Sopenharmony_ci.endif 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci add $64, %r11 95962306a36Sopenharmony_ci pshufb %xmm14, \XMM1 # perform a 16 byte swap 96062306a36Sopenharmony_ci pxor \XMMDst, \XMM1 96162306a36Sopenharmony_ci# combine GHASHed value with the corresponding ciphertext 96262306a36Sopenharmony_ci pshufb %xmm14, \XMM2 # perform a 16 byte swap 96362306a36Sopenharmony_ci pshufb %xmm14, \XMM3 # perform a 16 byte swap 96462306a36Sopenharmony_ci pshufb %xmm14, \XMM4 # perform a 16 byte swap 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci.L_initial_blocks_done\@: 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci.endm 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci/* 97162306a36Sopenharmony_ci* encrypt 4 blocks at a time 97262306a36Sopenharmony_ci* ghash the 4 previously encrypted ciphertext blocks 97362306a36Sopenharmony_ci* arg1, %arg3, %arg4 are used as pointers only, not modified 97462306a36Sopenharmony_ci* %r11 is the data offset value 97562306a36Sopenharmony_ci*/ 97662306a36Sopenharmony_ci.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \ 97762306a36Sopenharmony_ciTMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci movdqa \XMM1, \XMM5 98062306a36Sopenharmony_ci movdqa \XMM2, \XMM6 98162306a36Sopenharmony_ci movdqa \XMM3, \XMM7 98262306a36Sopenharmony_ci movdqa \XMM4, \XMM8 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm15 98562306a36Sopenharmony_ci # multiply TMP5 * HashKey using karatsuba 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci movdqa \XMM5, \TMP4 98862306a36Sopenharmony_ci pshufd $78, \XMM5, \TMP6 98962306a36Sopenharmony_ci pxor \XMM5, \TMP6 99062306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 99162306a36Sopenharmony_ci movdqu HashKey_4(%arg2), \TMP5 99262306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 99362306a36Sopenharmony_ci movdqa \XMM0, \XMM1 99462306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 99562306a36Sopenharmony_ci movdqa \XMM0, \XMM2 99662306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 99762306a36Sopenharmony_ci movdqa \XMM0, \XMM3 99862306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 99962306a36Sopenharmony_ci movdqa \XMM0, \XMM4 100062306a36Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 100162306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 100262306a36Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 100362306a36Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 100462306a36Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci pxor (%arg1), \XMM1 100762306a36Sopenharmony_ci pxor (%arg1), \XMM2 100862306a36Sopenharmony_ci pxor (%arg1), \XMM3 100962306a36Sopenharmony_ci pxor (%arg1), \XMM4 101062306a36Sopenharmony_ci movdqu HashKey_4_k(%arg2), \TMP5 101162306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) 101262306a36Sopenharmony_ci movaps 0x10(%arg1), \TMP1 101362306a36Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 1 101462306a36Sopenharmony_ci aesenc \TMP1, \XMM2 101562306a36Sopenharmony_ci aesenc \TMP1, \XMM3 101662306a36Sopenharmony_ci aesenc \TMP1, \XMM4 101762306a36Sopenharmony_ci movaps 0x20(%arg1), \TMP1 101862306a36Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 2 101962306a36Sopenharmony_ci aesenc \TMP1, \XMM2 102062306a36Sopenharmony_ci aesenc \TMP1, \XMM3 102162306a36Sopenharmony_ci aesenc \TMP1, \XMM4 102262306a36Sopenharmony_ci movdqa \XMM6, \TMP1 102362306a36Sopenharmony_ci pshufd $78, \XMM6, \TMP2 102462306a36Sopenharmony_ci pxor \XMM6, \TMP2 102562306a36Sopenharmony_ci movdqu HashKey_3(%arg2), \TMP5 102662306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 102762306a36Sopenharmony_ci movaps 0x30(%arg1), \TMP3 102862306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 3 102962306a36Sopenharmony_ci aesenc \TMP3, \XMM2 103062306a36Sopenharmony_ci aesenc \TMP3, \XMM3 103162306a36Sopenharmony_ci aesenc \TMP3, \XMM4 103262306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 103362306a36Sopenharmony_ci movaps 0x40(%arg1), \TMP3 103462306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 4 103562306a36Sopenharmony_ci aesenc \TMP3, \XMM2 103662306a36Sopenharmony_ci aesenc \TMP3, \XMM3 103762306a36Sopenharmony_ci aesenc \TMP3, \XMM4 103862306a36Sopenharmony_ci movdqu HashKey_3_k(%arg2), \TMP5 103962306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 104062306a36Sopenharmony_ci movaps 0x50(%arg1), \TMP3 104162306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 5 104262306a36Sopenharmony_ci aesenc \TMP3, \XMM2 104362306a36Sopenharmony_ci aesenc \TMP3, \XMM3 104462306a36Sopenharmony_ci aesenc \TMP3, \XMM4 104562306a36Sopenharmony_ci pxor \TMP1, \TMP4 104662306a36Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 104762306a36Sopenharmony_ci pxor \XMM6, \XMM5 104862306a36Sopenharmony_ci pxor \TMP2, \TMP6 104962306a36Sopenharmony_ci movdqa \XMM7, \TMP1 105062306a36Sopenharmony_ci pshufd $78, \XMM7, \TMP2 105162306a36Sopenharmony_ci pxor \XMM7, \TMP2 105262306a36Sopenharmony_ci movdqu HashKey_2(%arg2), \TMP5 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci # Multiply TMP5 * HashKey using karatsuba 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 105762306a36Sopenharmony_ci movaps 0x60(%arg1), \TMP3 105862306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 6 105962306a36Sopenharmony_ci aesenc \TMP3, \XMM2 106062306a36Sopenharmony_ci aesenc \TMP3, \XMM3 106162306a36Sopenharmony_ci aesenc \TMP3, \XMM4 106262306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 106362306a36Sopenharmony_ci movaps 0x70(%arg1), \TMP3 106462306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 7 106562306a36Sopenharmony_ci aesenc \TMP3, \XMM2 106662306a36Sopenharmony_ci aesenc \TMP3, \XMM3 106762306a36Sopenharmony_ci aesenc \TMP3, \XMM4 106862306a36Sopenharmony_ci movdqu HashKey_2_k(%arg2), \TMP5 106962306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 107062306a36Sopenharmony_ci movaps 0x80(%arg1), \TMP3 107162306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 8 107262306a36Sopenharmony_ci aesenc \TMP3, \XMM2 107362306a36Sopenharmony_ci aesenc \TMP3, \XMM3 107462306a36Sopenharmony_ci aesenc \TMP3, \XMM4 107562306a36Sopenharmony_ci pxor \TMP1, \TMP4 107662306a36Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 107762306a36Sopenharmony_ci pxor \XMM7, \XMM5 107862306a36Sopenharmony_ci pxor \TMP2, \TMP6 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci # Multiply XMM8 * HashKey 108162306a36Sopenharmony_ci # XMM8 and TMP5 hold the values for the two operands 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci movdqa \XMM8, \TMP1 108462306a36Sopenharmony_ci pshufd $78, \XMM8, \TMP2 108562306a36Sopenharmony_ci pxor \XMM8, \TMP2 108662306a36Sopenharmony_ci movdqu HashKey(%arg2), \TMP5 108762306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 108862306a36Sopenharmony_ci movaps 0x90(%arg1), \TMP3 108962306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 9 109062306a36Sopenharmony_ci aesenc \TMP3, \XMM2 109162306a36Sopenharmony_ci aesenc \TMP3, \XMM3 109262306a36Sopenharmony_ci aesenc \TMP3, \XMM4 109362306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 109462306a36Sopenharmony_ci lea 0xa0(%arg1),%r10 109562306a36Sopenharmony_ci mov keysize,%eax 109662306a36Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 109762306a36Sopenharmony_ci sub $4,%eax # 128->0, 192->2, 256->4 109862306a36Sopenharmony_ci jz .Laes_loop_par_enc_done\@ 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci.Laes_loop_par_enc\@: 110162306a36Sopenharmony_ci MOVADQ (%r10),\TMP3 110262306a36Sopenharmony_ci.irpc index, 1234 110362306a36Sopenharmony_ci aesenc \TMP3, %xmm\index 110462306a36Sopenharmony_ci.endr 110562306a36Sopenharmony_ci add $16,%r10 110662306a36Sopenharmony_ci sub $1,%eax 110762306a36Sopenharmony_ci jnz .Laes_loop_par_enc\@ 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci.Laes_loop_par_enc_done\@: 111062306a36Sopenharmony_ci MOVADQ (%r10), \TMP3 111162306a36Sopenharmony_ci aesenclast \TMP3, \XMM1 # Round 10 111262306a36Sopenharmony_ci aesenclast \TMP3, \XMM2 111362306a36Sopenharmony_ci aesenclast \TMP3, \XMM3 111462306a36Sopenharmony_ci aesenclast \TMP3, \XMM4 111562306a36Sopenharmony_ci movdqu HashKey_k(%arg2), \TMP5 111662306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 111762306a36Sopenharmony_ci movdqu (%arg4,%r11,1), \TMP3 111862306a36Sopenharmony_ci pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK 111962306a36Sopenharmony_ci movdqu 16(%arg4,%r11,1), \TMP3 112062306a36Sopenharmony_ci pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK 112162306a36Sopenharmony_ci movdqu 32(%arg4,%r11,1), \TMP3 112262306a36Sopenharmony_ci pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK 112362306a36Sopenharmony_ci movdqu 48(%arg4,%r11,1), \TMP3 112462306a36Sopenharmony_ci pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK 112562306a36Sopenharmony_ci movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer 112662306a36Sopenharmony_ci movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer 112762306a36Sopenharmony_ci movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer 112862306a36Sopenharmony_ci movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer 112962306a36Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 113062306a36Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 113162306a36Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 113262306a36Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci pxor \TMP4, \TMP1 113562306a36Sopenharmony_ci pxor \XMM8, \XMM5 113662306a36Sopenharmony_ci pxor \TMP6, \TMP2 113762306a36Sopenharmony_ci pxor \TMP1, \TMP2 113862306a36Sopenharmony_ci pxor \XMM5, \TMP2 113962306a36Sopenharmony_ci movdqa \TMP2, \TMP3 114062306a36Sopenharmony_ci pslldq $8, \TMP3 # left shift TMP3 2 DWs 114162306a36Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 114262306a36Sopenharmony_ci pxor \TMP3, \XMM5 114362306a36Sopenharmony_ci pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci # first phase of reduction 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci movdqa \XMM5, \TMP2 114862306a36Sopenharmony_ci movdqa \XMM5, \TMP3 114962306a36Sopenharmony_ci movdqa \XMM5, \TMP4 115062306a36Sopenharmony_ci# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently 115162306a36Sopenharmony_ci pslld $31, \TMP2 # packed right shift << 31 115262306a36Sopenharmony_ci pslld $30, \TMP3 # packed right shift << 30 115362306a36Sopenharmony_ci pslld $25, \TMP4 # packed right shift << 25 115462306a36Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 115562306a36Sopenharmony_ci pxor \TMP4, \TMP2 115662306a36Sopenharmony_ci movdqa \TMP2, \TMP5 115762306a36Sopenharmony_ci psrldq $4, \TMP5 # right shift T5 1 DW 115862306a36Sopenharmony_ci pslldq $12, \TMP2 # left shift T2 3 DWs 115962306a36Sopenharmony_ci pxor \TMP2, \XMM5 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci # second phase of reduction 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 116462306a36Sopenharmony_ci movdqa \XMM5,\TMP3 116562306a36Sopenharmony_ci movdqa \XMM5,\TMP4 116662306a36Sopenharmony_ci psrld $1, \TMP2 # packed left shift >>1 116762306a36Sopenharmony_ci psrld $2, \TMP3 # packed left shift >>2 116862306a36Sopenharmony_ci psrld $7, \TMP4 # packed left shift >>7 116962306a36Sopenharmony_ci pxor \TMP3,\TMP2 # xor the shifted versions 117062306a36Sopenharmony_ci pxor \TMP4,\TMP2 117162306a36Sopenharmony_ci pxor \TMP5, \TMP2 117262306a36Sopenharmony_ci pxor \TMP2, \XMM5 117362306a36Sopenharmony_ci pxor \TMP1, \XMM5 # result is in TMP1 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci pxor \XMM5, \XMM1 117662306a36Sopenharmony_ci.endm 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci/* 117962306a36Sopenharmony_ci* decrypt 4 blocks at a time 118062306a36Sopenharmony_ci* ghash the 4 previously decrypted ciphertext blocks 118162306a36Sopenharmony_ci* arg1, %arg3, %arg4 are used as pointers only, not modified 118262306a36Sopenharmony_ci* %r11 is the data offset value 118362306a36Sopenharmony_ci*/ 118462306a36Sopenharmony_ci.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \ 118562306a36Sopenharmony_ciTMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci movdqa \XMM1, \XMM5 118862306a36Sopenharmony_ci movdqa \XMM2, \XMM6 118962306a36Sopenharmony_ci movdqa \XMM3, \XMM7 119062306a36Sopenharmony_ci movdqa \XMM4, \XMM8 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci movdqa SHUF_MASK(%rip), %xmm15 119362306a36Sopenharmony_ci # multiply TMP5 * HashKey using karatsuba 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ci movdqa \XMM5, \TMP4 119662306a36Sopenharmony_ci pshufd $78, \XMM5, \TMP6 119762306a36Sopenharmony_ci pxor \XMM5, \TMP6 119862306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 119962306a36Sopenharmony_ci movdqu HashKey_4(%arg2), \TMP5 120062306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 120162306a36Sopenharmony_ci movdqa \XMM0, \XMM1 120262306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 120362306a36Sopenharmony_ci movdqa \XMM0, \XMM2 120462306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 120562306a36Sopenharmony_ci movdqa \XMM0, \XMM3 120662306a36Sopenharmony_ci paddd ONE(%rip), \XMM0 # INCR CNT 120762306a36Sopenharmony_ci movdqa \XMM0, \XMM4 120862306a36Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 120962306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 121062306a36Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 121162306a36Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 121262306a36Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci pxor (%arg1), \XMM1 121562306a36Sopenharmony_ci pxor (%arg1), \XMM2 121662306a36Sopenharmony_ci pxor (%arg1), \XMM3 121762306a36Sopenharmony_ci pxor (%arg1), \XMM4 121862306a36Sopenharmony_ci movdqu HashKey_4_k(%arg2), \TMP5 121962306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) 122062306a36Sopenharmony_ci movaps 0x10(%arg1), \TMP1 122162306a36Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 1 122262306a36Sopenharmony_ci aesenc \TMP1, \XMM2 122362306a36Sopenharmony_ci aesenc \TMP1, \XMM3 122462306a36Sopenharmony_ci aesenc \TMP1, \XMM4 122562306a36Sopenharmony_ci movaps 0x20(%arg1), \TMP1 122662306a36Sopenharmony_ci aesenc \TMP1, \XMM1 # Round 2 122762306a36Sopenharmony_ci aesenc \TMP1, \XMM2 122862306a36Sopenharmony_ci aesenc \TMP1, \XMM3 122962306a36Sopenharmony_ci aesenc \TMP1, \XMM4 123062306a36Sopenharmony_ci movdqa \XMM6, \TMP1 123162306a36Sopenharmony_ci pshufd $78, \XMM6, \TMP2 123262306a36Sopenharmony_ci pxor \XMM6, \TMP2 123362306a36Sopenharmony_ci movdqu HashKey_3(%arg2), \TMP5 123462306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 123562306a36Sopenharmony_ci movaps 0x30(%arg1), \TMP3 123662306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 3 123762306a36Sopenharmony_ci aesenc \TMP3, \XMM2 123862306a36Sopenharmony_ci aesenc \TMP3, \XMM3 123962306a36Sopenharmony_ci aesenc \TMP3, \XMM4 124062306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 124162306a36Sopenharmony_ci movaps 0x40(%arg1), \TMP3 124262306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 4 124362306a36Sopenharmony_ci aesenc \TMP3, \XMM2 124462306a36Sopenharmony_ci aesenc \TMP3, \XMM3 124562306a36Sopenharmony_ci aesenc \TMP3, \XMM4 124662306a36Sopenharmony_ci movdqu HashKey_3_k(%arg2), \TMP5 124762306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 124862306a36Sopenharmony_ci movaps 0x50(%arg1), \TMP3 124962306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 5 125062306a36Sopenharmony_ci aesenc \TMP3, \XMM2 125162306a36Sopenharmony_ci aesenc \TMP3, \XMM3 125262306a36Sopenharmony_ci aesenc \TMP3, \XMM4 125362306a36Sopenharmony_ci pxor \TMP1, \TMP4 125462306a36Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 125562306a36Sopenharmony_ci pxor \XMM6, \XMM5 125662306a36Sopenharmony_ci pxor \TMP2, \TMP6 125762306a36Sopenharmony_ci movdqa \XMM7, \TMP1 125862306a36Sopenharmony_ci pshufd $78, \XMM7, \TMP2 125962306a36Sopenharmony_ci pxor \XMM7, \TMP2 126062306a36Sopenharmony_ci movdqu HashKey_2(%arg2), \TMP5 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci # Multiply TMP5 * HashKey using karatsuba 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 126562306a36Sopenharmony_ci movaps 0x60(%arg1), \TMP3 126662306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 6 126762306a36Sopenharmony_ci aesenc \TMP3, \XMM2 126862306a36Sopenharmony_ci aesenc \TMP3, \XMM3 126962306a36Sopenharmony_ci aesenc \TMP3, \XMM4 127062306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 127162306a36Sopenharmony_ci movaps 0x70(%arg1), \TMP3 127262306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 7 127362306a36Sopenharmony_ci aesenc \TMP3, \XMM2 127462306a36Sopenharmony_ci aesenc \TMP3, \XMM3 127562306a36Sopenharmony_ci aesenc \TMP3, \XMM4 127662306a36Sopenharmony_ci movdqu HashKey_2_k(%arg2), \TMP5 127762306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 127862306a36Sopenharmony_ci movaps 0x80(%arg1), \TMP3 127962306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 8 128062306a36Sopenharmony_ci aesenc \TMP3, \XMM2 128162306a36Sopenharmony_ci aesenc \TMP3, \XMM3 128262306a36Sopenharmony_ci aesenc \TMP3, \XMM4 128362306a36Sopenharmony_ci pxor \TMP1, \TMP4 128462306a36Sopenharmony_ci# accumulate the results in TMP4:XMM5, TMP6 holds the middle part 128562306a36Sopenharmony_ci pxor \XMM7, \XMM5 128662306a36Sopenharmony_ci pxor \TMP2, \TMP6 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci # Multiply XMM8 * HashKey 128962306a36Sopenharmony_ci # XMM8 and TMP5 hold the values for the two operands 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci movdqa \XMM8, \TMP1 129262306a36Sopenharmony_ci pshufd $78, \XMM8, \TMP2 129362306a36Sopenharmony_ci pxor \XMM8, \TMP2 129462306a36Sopenharmony_ci movdqu HashKey(%arg2), \TMP5 129562306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 129662306a36Sopenharmony_ci movaps 0x90(%arg1), \TMP3 129762306a36Sopenharmony_ci aesenc \TMP3, \XMM1 # Round 9 129862306a36Sopenharmony_ci aesenc \TMP3, \XMM2 129962306a36Sopenharmony_ci aesenc \TMP3, \XMM3 130062306a36Sopenharmony_ci aesenc \TMP3, \XMM4 130162306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 130262306a36Sopenharmony_ci lea 0xa0(%arg1),%r10 130362306a36Sopenharmony_ci mov keysize,%eax 130462306a36Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 130562306a36Sopenharmony_ci sub $4,%eax # 128->0, 192->2, 256->4 130662306a36Sopenharmony_ci jz .Laes_loop_par_dec_done\@ 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci.Laes_loop_par_dec\@: 130962306a36Sopenharmony_ci MOVADQ (%r10),\TMP3 131062306a36Sopenharmony_ci.irpc index, 1234 131162306a36Sopenharmony_ci aesenc \TMP3, %xmm\index 131262306a36Sopenharmony_ci.endr 131362306a36Sopenharmony_ci add $16,%r10 131462306a36Sopenharmony_ci sub $1,%eax 131562306a36Sopenharmony_ci jnz .Laes_loop_par_dec\@ 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci.Laes_loop_par_dec_done\@: 131862306a36Sopenharmony_ci MOVADQ (%r10), \TMP3 131962306a36Sopenharmony_ci aesenclast \TMP3, \XMM1 # last round 132062306a36Sopenharmony_ci aesenclast \TMP3, \XMM2 132162306a36Sopenharmony_ci aesenclast \TMP3, \XMM3 132262306a36Sopenharmony_ci aesenclast \TMP3, \XMM4 132362306a36Sopenharmony_ci movdqu HashKey_k(%arg2), \TMP5 132462306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 132562306a36Sopenharmony_ci movdqu (%arg4,%r11,1), \TMP3 132662306a36Sopenharmony_ci pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK 132762306a36Sopenharmony_ci movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer 132862306a36Sopenharmony_ci movdqa \TMP3, \XMM1 132962306a36Sopenharmony_ci movdqu 16(%arg4,%r11,1), \TMP3 133062306a36Sopenharmony_ci pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK 133162306a36Sopenharmony_ci movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer 133262306a36Sopenharmony_ci movdqa \TMP3, \XMM2 133362306a36Sopenharmony_ci movdqu 32(%arg4,%r11,1), \TMP3 133462306a36Sopenharmony_ci pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK 133562306a36Sopenharmony_ci movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer 133662306a36Sopenharmony_ci movdqa \TMP3, \XMM3 133762306a36Sopenharmony_ci movdqu 48(%arg4,%r11,1), \TMP3 133862306a36Sopenharmony_ci pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK 133962306a36Sopenharmony_ci movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer 134062306a36Sopenharmony_ci movdqa \TMP3, \XMM4 134162306a36Sopenharmony_ci pshufb %xmm15, \XMM1 # perform a 16 byte swap 134262306a36Sopenharmony_ci pshufb %xmm15, \XMM2 # perform a 16 byte swap 134362306a36Sopenharmony_ci pshufb %xmm15, \XMM3 # perform a 16 byte swap 134462306a36Sopenharmony_ci pshufb %xmm15, \XMM4 # perform a 16 byte swap 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci pxor \TMP4, \TMP1 134762306a36Sopenharmony_ci pxor \XMM8, \XMM5 134862306a36Sopenharmony_ci pxor \TMP6, \TMP2 134962306a36Sopenharmony_ci pxor \TMP1, \TMP2 135062306a36Sopenharmony_ci pxor \XMM5, \TMP2 135162306a36Sopenharmony_ci movdqa \TMP2, \TMP3 135262306a36Sopenharmony_ci pslldq $8, \TMP3 # left shift TMP3 2 DWs 135362306a36Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 135462306a36Sopenharmony_ci pxor \TMP3, \XMM5 135562306a36Sopenharmony_ci pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_ci # first phase of reduction 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci movdqa \XMM5, \TMP2 136062306a36Sopenharmony_ci movdqa \XMM5, \TMP3 136162306a36Sopenharmony_ci movdqa \XMM5, \TMP4 136262306a36Sopenharmony_ci# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently 136362306a36Sopenharmony_ci pslld $31, \TMP2 # packed right shift << 31 136462306a36Sopenharmony_ci pslld $30, \TMP3 # packed right shift << 30 136562306a36Sopenharmony_ci pslld $25, \TMP4 # packed right shift << 25 136662306a36Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 136762306a36Sopenharmony_ci pxor \TMP4, \TMP2 136862306a36Sopenharmony_ci movdqa \TMP2, \TMP5 136962306a36Sopenharmony_ci psrldq $4, \TMP5 # right shift T5 1 DW 137062306a36Sopenharmony_ci pslldq $12, \TMP2 # left shift T2 3 DWs 137162306a36Sopenharmony_ci pxor \TMP2, \XMM5 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci # second phase of reduction 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 137662306a36Sopenharmony_ci movdqa \XMM5,\TMP3 137762306a36Sopenharmony_ci movdqa \XMM5,\TMP4 137862306a36Sopenharmony_ci psrld $1, \TMP2 # packed left shift >>1 137962306a36Sopenharmony_ci psrld $2, \TMP3 # packed left shift >>2 138062306a36Sopenharmony_ci psrld $7, \TMP4 # packed left shift >>7 138162306a36Sopenharmony_ci pxor \TMP3,\TMP2 # xor the shifted versions 138262306a36Sopenharmony_ci pxor \TMP4,\TMP2 138362306a36Sopenharmony_ci pxor \TMP5, \TMP2 138462306a36Sopenharmony_ci pxor \TMP2, \XMM5 138562306a36Sopenharmony_ci pxor \TMP1, \XMM5 # result is in TMP1 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci pxor \XMM5, \XMM1 138862306a36Sopenharmony_ci.endm 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci/* GHASH the last 4 ciphertext blocks. */ 139162306a36Sopenharmony_ci.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ 139262306a36Sopenharmony_ciTMP7 XMM1 XMM2 XMM3 XMM4 XMMDst 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci # Multiply TMP6 * HashKey (using Karatsuba) 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ci movdqa \XMM1, \TMP6 139762306a36Sopenharmony_ci pshufd $78, \XMM1, \TMP2 139862306a36Sopenharmony_ci pxor \XMM1, \TMP2 139962306a36Sopenharmony_ci movdqu HashKey_4(%arg2), \TMP5 140062306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1 140162306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0 140262306a36Sopenharmony_ci movdqu HashKey_4_k(%arg2), \TMP4 140362306a36Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 140462306a36Sopenharmony_ci movdqa \XMM1, \XMMDst 140562306a36Sopenharmony_ci movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci # Multiply TMP1 * HashKey (using Karatsuba) 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci movdqa \XMM2, \TMP1 141062306a36Sopenharmony_ci pshufd $78, \XMM2, \TMP2 141162306a36Sopenharmony_ci pxor \XMM2, \TMP2 141262306a36Sopenharmony_ci movdqu HashKey_3(%arg2), \TMP5 141362306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 141462306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0 141562306a36Sopenharmony_ci movdqu HashKey_3_k(%arg2), \TMP4 141662306a36Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 141762306a36Sopenharmony_ci pxor \TMP1, \TMP6 141862306a36Sopenharmony_ci pxor \XMM2, \XMMDst 141962306a36Sopenharmony_ci pxor \TMP2, \XMM1 142062306a36Sopenharmony_ci# results accumulated in TMP6, XMMDst, XMM1 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ci # Multiply TMP1 * HashKey (using Karatsuba) 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci movdqa \XMM3, \TMP1 142562306a36Sopenharmony_ci pshufd $78, \XMM3, \TMP2 142662306a36Sopenharmony_ci pxor \XMM3, \TMP2 142762306a36Sopenharmony_ci movdqu HashKey_2(%arg2), \TMP5 142862306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 142962306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0 143062306a36Sopenharmony_ci movdqu HashKey_2_k(%arg2), \TMP4 143162306a36Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 143262306a36Sopenharmony_ci pxor \TMP1, \TMP6 143362306a36Sopenharmony_ci pxor \XMM3, \XMMDst 143462306a36Sopenharmony_ci pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci # Multiply TMP1 * HashKey (using Karatsuba) 143762306a36Sopenharmony_ci movdqa \XMM4, \TMP1 143862306a36Sopenharmony_ci pshufd $78, \XMM4, \TMP2 143962306a36Sopenharmony_ci pxor \XMM4, \TMP2 144062306a36Sopenharmony_ci movdqu HashKey(%arg2), \TMP5 144162306a36Sopenharmony_ci pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 144262306a36Sopenharmony_ci pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0 144362306a36Sopenharmony_ci movdqu HashKey_k(%arg2), \TMP4 144462306a36Sopenharmony_ci pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) 144562306a36Sopenharmony_ci pxor \TMP1, \TMP6 144662306a36Sopenharmony_ci pxor \XMM4, \XMMDst 144762306a36Sopenharmony_ci pxor \XMM1, \TMP2 144862306a36Sopenharmony_ci pxor \TMP6, \TMP2 144962306a36Sopenharmony_ci pxor \XMMDst, \TMP2 145062306a36Sopenharmony_ci # middle section of the temp results combined as in karatsuba algorithm 145162306a36Sopenharmony_ci movdqa \TMP2, \TMP4 145262306a36Sopenharmony_ci pslldq $8, \TMP4 # left shift TMP4 2 DWs 145362306a36Sopenharmony_ci psrldq $8, \TMP2 # right shift TMP2 2 DWs 145462306a36Sopenharmony_ci pxor \TMP4, \XMMDst 145562306a36Sopenharmony_ci pxor \TMP2, \TMP6 145662306a36Sopenharmony_ci# TMP6:XMMDst holds the result of the accumulated carry-less multiplications 145762306a36Sopenharmony_ci # first phase of the reduction 145862306a36Sopenharmony_ci movdqa \XMMDst, \TMP2 145962306a36Sopenharmony_ci movdqa \XMMDst, \TMP3 146062306a36Sopenharmony_ci movdqa \XMMDst, \TMP4 146162306a36Sopenharmony_ci# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently 146262306a36Sopenharmony_ci pslld $31, \TMP2 # packed right shifting << 31 146362306a36Sopenharmony_ci pslld $30, \TMP3 # packed right shifting << 30 146462306a36Sopenharmony_ci pslld $25, \TMP4 # packed right shifting << 25 146562306a36Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 146662306a36Sopenharmony_ci pxor \TMP4, \TMP2 146762306a36Sopenharmony_ci movdqa \TMP2, \TMP7 146862306a36Sopenharmony_ci psrldq $4, \TMP7 # right shift TMP7 1 DW 146962306a36Sopenharmony_ci pslldq $12, \TMP2 # left shift TMP2 3 DWs 147062306a36Sopenharmony_ci pxor \TMP2, \XMMDst 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_ci # second phase of the reduction 147362306a36Sopenharmony_ci movdqa \XMMDst, \TMP2 147462306a36Sopenharmony_ci # make 3 copies of XMMDst for doing 3 shift operations 147562306a36Sopenharmony_ci movdqa \XMMDst, \TMP3 147662306a36Sopenharmony_ci movdqa \XMMDst, \TMP4 147762306a36Sopenharmony_ci psrld $1, \TMP2 # packed left shift >> 1 147862306a36Sopenharmony_ci psrld $2, \TMP3 # packed left shift >> 2 147962306a36Sopenharmony_ci psrld $7, \TMP4 # packed left shift >> 7 148062306a36Sopenharmony_ci pxor \TMP3, \TMP2 # xor the shifted versions 148162306a36Sopenharmony_ci pxor \TMP4, \TMP2 148262306a36Sopenharmony_ci pxor \TMP7, \TMP2 148362306a36Sopenharmony_ci pxor \TMP2, \XMMDst 148462306a36Sopenharmony_ci pxor \TMP6, \XMMDst # reduced result is in XMMDst 148562306a36Sopenharmony_ci.endm 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci/* Encryption of a single block 148962306a36Sopenharmony_ci* uses eax & r10 149062306a36Sopenharmony_ci*/ 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci pxor (%arg1), \XMM0 149562306a36Sopenharmony_ci mov keysize,%eax 149662306a36Sopenharmony_ci shr $2,%eax # 128->4, 192->6, 256->8 149762306a36Sopenharmony_ci add $5,%eax # 128->9, 192->11, 256->13 149862306a36Sopenharmony_ci lea 16(%arg1), %r10 # get first expanded key address 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci_esb_loop_\@: 150162306a36Sopenharmony_ci MOVADQ (%r10),\TMP1 150262306a36Sopenharmony_ci aesenc \TMP1,\XMM0 150362306a36Sopenharmony_ci add $16,%r10 150462306a36Sopenharmony_ci sub $1,%eax 150562306a36Sopenharmony_ci jnz _esb_loop_\@ 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci MOVADQ (%r10),\TMP1 150862306a36Sopenharmony_ci aesenclast \TMP1,\XMM0 150962306a36Sopenharmony_ci.endm 151062306a36Sopenharmony_ci/***************************************************************************** 151162306a36Sopenharmony_ci* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 151262306a36Sopenharmony_ci* struct gcm_context_data *data 151362306a36Sopenharmony_ci* // Context data 151462306a36Sopenharmony_ci* u8 *out, // Plaintext output. Encrypt in-place is allowed. 151562306a36Sopenharmony_ci* const u8 *in, // Ciphertext input 151662306a36Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for decryption. 151762306a36Sopenharmony_ci* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) 151862306a36Sopenharmony_ci* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) 151962306a36Sopenharmony_ci* // concatenated with 0x00000001. 16-byte aligned pointer. 152062306a36Sopenharmony_ci* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. 152162306a36Sopenharmony_ci* const u8 *aad, // Additional Authentication Data (AAD) 152262306a36Sopenharmony_ci* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes 152362306a36Sopenharmony_ci* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the 152462306a36Sopenharmony_ci* // given authentication tag and only return the plaintext if they match. 152562306a36Sopenharmony_ci* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 152662306a36Sopenharmony_ci* // (most likely), 12 or 8. 152762306a36Sopenharmony_ci* 152862306a36Sopenharmony_ci* Assumptions: 152962306a36Sopenharmony_ci* 153062306a36Sopenharmony_ci* keys: 153162306a36Sopenharmony_ci* keys are pre-expanded and aligned to 16 bytes. we are using the first 153262306a36Sopenharmony_ci* set of 11 keys in the data structure void *aes_ctx 153362306a36Sopenharmony_ci* 153462306a36Sopenharmony_ci* iv: 153562306a36Sopenharmony_ci* 0 1 2 3 153662306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 153762306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 153862306a36Sopenharmony_ci* | Salt (From the SA) | 153962306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 154062306a36Sopenharmony_ci* | Initialization Vector | 154162306a36Sopenharmony_ci* | (This is the sequence number from IPSec header) | 154262306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 154362306a36Sopenharmony_ci* | 0x1 | 154462306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 154562306a36Sopenharmony_ci* 154662306a36Sopenharmony_ci* 154762306a36Sopenharmony_ci* 154862306a36Sopenharmony_ci* AAD: 154962306a36Sopenharmony_ci* AAD padded to 128 bits with 0 155062306a36Sopenharmony_ci* for example, assume AAD is a u32 vector 155162306a36Sopenharmony_ci* 155262306a36Sopenharmony_ci* if AAD is 8 bytes: 155362306a36Sopenharmony_ci* AAD[3] = {A0, A1}; 155462306a36Sopenharmony_ci* padded AAD in xmm register = {A1 A0 0 0} 155562306a36Sopenharmony_ci* 155662306a36Sopenharmony_ci* 0 1 2 3 155762306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 155862306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 155962306a36Sopenharmony_ci* | SPI (A1) | 156062306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 156162306a36Sopenharmony_ci* | 32-bit Sequence Number (A0) | 156262306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 156362306a36Sopenharmony_ci* | 0x0 | 156462306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 156562306a36Sopenharmony_ci* 156662306a36Sopenharmony_ci* AAD Format with 32-bit Sequence Number 156762306a36Sopenharmony_ci* 156862306a36Sopenharmony_ci* if AAD is 12 bytes: 156962306a36Sopenharmony_ci* AAD[3] = {A0, A1, A2}; 157062306a36Sopenharmony_ci* padded AAD in xmm register = {A2 A1 A0 0} 157162306a36Sopenharmony_ci* 157262306a36Sopenharmony_ci* 0 1 2 3 157362306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 157462306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 157562306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 157662306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 157762306a36Sopenharmony_ci* | SPI (A2) | 157862306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 157962306a36Sopenharmony_ci* | 64-bit Extended Sequence Number {A1,A0} | 158062306a36Sopenharmony_ci* | | 158162306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 158262306a36Sopenharmony_ci* | 0x0 | 158362306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 158462306a36Sopenharmony_ci* 158562306a36Sopenharmony_ci* AAD Format with 64-bit Extended Sequence Number 158662306a36Sopenharmony_ci* 158762306a36Sopenharmony_ci* poly = x^128 + x^127 + x^126 + x^121 + 1 158862306a36Sopenharmony_ci* 158962306a36Sopenharmony_ci*****************************************************************************/ 159062306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec) 159162306a36Sopenharmony_ci FUNC_SAVE 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci GCM_INIT %arg6, arg7, arg8, arg9 159462306a36Sopenharmony_ci GCM_ENC_DEC dec 159562306a36Sopenharmony_ci GCM_COMPLETE arg10, arg11 159662306a36Sopenharmony_ci FUNC_RESTORE 159762306a36Sopenharmony_ci RET 159862306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec) 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci/***************************************************************************** 160262306a36Sopenharmony_ci* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 160362306a36Sopenharmony_ci* struct gcm_context_data *data 160462306a36Sopenharmony_ci* // Context data 160562306a36Sopenharmony_ci* u8 *out, // Ciphertext output. Encrypt in-place is allowed. 160662306a36Sopenharmony_ci* const u8 *in, // Plaintext input 160762306a36Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for encryption. 160862306a36Sopenharmony_ci* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) 160962306a36Sopenharmony_ci* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) 161062306a36Sopenharmony_ci* // concatenated with 0x00000001. 16-byte aligned pointer. 161162306a36Sopenharmony_ci* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. 161262306a36Sopenharmony_ci* const u8 *aad, // Additional Authentication Data (AAD) 161362306a36Sopenharmony_ci* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes 161462306a36Sopenharmony_ci* u8 *auth_tag, // Authenticated Tag output. 161562306a36Sopenharmony_ci* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), 161662306a36Sopenharmony_ci* // 12 or 8. 161762306a36Sopenharmony_ci* 161862306a36Sopenharmony_ci* Assumptions: 161962306a36Sopenharmony_ci* 162062306a36Sopenharmony_ci* keys: 162162306a36Sopenharmony_ci* keys are pre-expanded and aligned to 16 bytes. we are using the 162262306a36Sopenharmony_ci* first set of 11 keys in the data structure void *aes_ctx 162362306a36Sopenharmony_ci* 162462306a36Sopenharmony_ci* 162562306a36Sopenharmony_ci* iv: 162662306a36Sopenharmony_ci* 0 1 2 3 162762306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 162862306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 162962306a36Sopenharmony_ci* | Salt (From the SA) | 163062306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 163162306a36Sopenharmony_ci* | Initialization Vector | 163262306a36Sopenharmony_ci* | (This is the sequence number from IPSec header) | 163362306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 163462306a36Sopenharmony_ci* | 0x1 | 163562306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 163662306a36Sopenharmony_ci* 163762306a36Sopenharmony_ci* 163862306a36Sopenharmony_ci* 163962306a36Sopenharmony_ci* AAD: 164062306a36Sopenharmony_ci* AAD padded to 128 bits with 0 164162306a36Sopenharmony_ci* for example, assume AAD is a u32 vector 164262306a36Sopenharmony_ci* 164362306a36Sopenharmony_ci* if AAD is 8 bytes: 164462306a36Sopenharmony_ci* AAD[3] = {A0, A1}; 164562306a36Sopenharmony_ci* padded AAD in xmm register = {A1 A0 0 0} 164662306a36Sopenharmony_ci* 164762306a36Sopenharmony_ci* 0 1 2 3 164862306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 164962306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 165062306a36Sopenharmony_ci* | SPI (A1) | 165162306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 165262306a36Sopenharmony_ci* | 32-bit Sequence Number (A0) | 165362306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 165462306a36Sopenharmony_ci* | 0x0 | 165562306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 165662306a36Sopenharmony_ci* 165762306a36Sopenharmony_ci* AAD Format with 32-bit Sequence Number 165862306a36Sopenharmony_ci* 165962306a36Sopenharmony_ci* if AAD is 12 bytes: 166062306a36Sopenharmony_ci* AAD[3] = {A0, A1, A2}; 166162306a36Sopenharmony_ci* padded AAD in xmm register = {A2 A1 A0 0} 166262306a36Sopenharmony_ci* 166362306a36Sopenharmony_ci* 0 1 2 3 166462306a36Sopenharmony_ci* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 166562306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 166662306a36Sopenharmony_ci* | SPI (A2) | 166762306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 166862306a36Sopenharmony_ci* | 64-bit Extended Sequence Number {A1,A0} | 166962306a36Sopenharmony_ci* | | 167062306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 167162306a36Sopenharmony_ci* | 0x0 | 167262306a36Sopenharmony_ci* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 167362306a36Sopenharmony_ci* 167462306a36Sopenharmony_ci* AAD Format with 64-bit Extended Sequence Number 167562306a36Sopenharmony_ci* 167662306a36Sopenharmony_ci* poly = x^128 + x^127 + x^126 + x^121 + 1 167762306a36Sopenharmony_ci***************************************************************************/ 167862306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc) 167962306a36Sopenharmony_ci FUNC_SAVE 168062306a36Sopenharmony_ci 168162306a36Sopenharmony_ci GCM_INIT %arg6, arg7, arg8, arg9 168262306a36Sopenharmony_ci GCM_ENC_DEC enc 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_ci GCM_COMPLETE arg10, arg11 168562306a36Sopenharmony_ci FUNC_RESTORE 168662306a36Sopenharmony_ci RET 168762306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc) 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_ci/***************************************************************************** 169062306a36Sopenharmony_ci* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 169162306a36Sopenharmony_ci* struct gcm_context_data *data, 169262306a36Sopenharmony_ci* // context data 169362306a36Sopenharmony_ci* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) 169462306a36Sopenharmony_ci* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) 169562306a36Sopenharmony_ci* // concatenated with 0x00000001. 16-byte aligned pointer. 169662306a36Sopenharmony_ci* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. 169762306a36Sopenharmony_ci* const u8 *aad, // Additional Authentication Data (AAD) 169862306a36Sopenharmony_ci* u64 aad_len) // Length of AAD in bytes. 169962306a36Sopenharmony_ci*/ 170062306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_init) 170162306a36Sopenharmony_ci FUNC_SAVE 170262306a36Sopenharmony_ci GCM_INIT %arg3, %arg4,%arg5, %arg6 170362306a36Sopenharmony_ci FUNC_RESTORE 170462306a36Sopenharmony_ci RET 170562306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_init) 170662306a36Sopenharmony_ci 170762306a36Sopenharmony_ci/***************************************************************************** 170862306a36Sopenharmony_ci* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 170962306a36Sopenharmony_ci* struct gcm_context_data *data, 171062306a36Sopenharmony_ci* // context data 171162306a36Sopenharmony_ci* u8 *out, // Ciphertext output. Encrypt in-place is allowed. 171262306a36Sopenharmony_ci* const u8 *in, // Plaintext input 171362306a36Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for encryption. 171462306a36Sopenharmony_ci*/ 171562306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_enc_update) 171662306a36Sopenharmony_ci FUNC_SAVE 171762306a36Sopenharmony_ci GCM_ENC_DEC enc 171862306a36Sopenharmony_ci FUNC_RESTORE 171962306a36Sopenharmony_ci RET 172062306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_enc_update) 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_ci/***************************************************************************** 172362306a36Sopenharmony_ci* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 172462306a36Sopenharmony_ci* struct gcm_context_data *data, 172562306a36Sopenharmony_ci* // context data 172662306a36Sopenharmony_ci* u8 *out, // Ciphertext output. Encrypt in-place is allowed. 172762306a36Sopenharmony_ci* const u8 *in, // Plaintext input 172862306a36Sopenharmony_ci* u64 plaintext_len, // Length of data in bytes for encryption. 172962306a36Sopenharmony_ci*/ 173062306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_dec_update) 173162306a36Sopenharmony_ci FUNC_SAVE 173262306a36Sopenharmony_ci GCM_ENC_DEC dec 173362306a36Sopenharmony_ci FUNC_RESTORE 173462306a36Sopenharmony_ci RET 173562306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_dec_update) 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci/***************************************************************************** 173862306a36Sopenharmony_ci* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. 173962306a36Sopenharmony_ci* struct gcm_context_data *data, 174062306a36Sopenharmony_ci* // context data 174162306a36Sopenharmony_ci* u8 *auth_tag, // Authenticated Tag output. 174262306a36Sopenharmony_ci* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), 174362306a36Sopenharmony_ci* // 12 or 8. 174462306a36Sopenharmony_ci*/ 174562306a36Sopenharmony_ciSYM_FUNC_START(aesni_gcm_finalize) 174662306a36Sopenharmony_ci FUNC_SAVE 174762306a36Sopenharmony_ci GCM_COMPLETE %arg3 %arg4 174862306a36Sopenharmony_ci FUNC_RESTORE 174962306a36Sopenharmony_ci RET 175062306a36Sopenharmony_ciSYM_FUNC_END(aesni_gcm_finalize) 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci#endif 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_256a) 175562306a36Sopenharmony_ci pshufd $0b11111111, %xmm1, %xmm1 175662306a36Sopenharmony_ci shufps $0b00010000, %xmm0, %xmm4 175762306a36Sopenharmony_ci pxor %xmm4, %xmm0 175862306a36Sopenharmony_ci shufps $0b10001100, %xmm0, %xmm4 175962306a36Sopenharmony_ci pxor %xmm4, %xmm0 176062306a36Sopenharmony_ci pxor %xmm1, %xmm0 176162306a36Sopenharmony_ci movaps %xmm0, (TKEYP) 176262306a36Sopenharmony_ci add $0x10, TKEYP 176362306a36Sopenharmony_ci RET 176462306a36Sopenharmony_ciSYM_FUNC_END(_key_expansion_256a) 176562306a36Sopenharmony_ciSYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_192a) 176862306a36Sopenharmony_ci pshufd $0b01010101, %xmm1, %xmm1 176962306a36Sopenharmony_ci shufps $0b00010000, %xmm0, %xmm4 177062306a36Sopenharmony_ci pxor %xmm4, %xmm0 177162306a36Sopenharmony_ci shufps $0b10001100, %xmm0, %xmm4 177262306a36Sopenharmony_ci pxor %xmm4, %xmm0 177362306a36Sopenharmony_ci pxor %xmm1, %xmm0 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci movaps %xmm2, %xmm5 177662306a36Sopenharmony_ci movaps %xmm2, %xmm6 177762306a36Sopenharmony_ci pslldq $4, %xmm5 177862306a36Sopenharmony_ci pshufd $0b11111111, %xmm0, %xmm3 177962306a36Sopenharmony_ci pxor %xmm3, %xmm2 178062306a36Sopenharmony_ci pxor %xmm5, %xmm2 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci movaps %xmm0, %xmm1 178362306a36Sopenharmony_ci shufps $0b01000100, %xmm0, %xmm6 178462306a36Sopenharmony_ci movaps %xmm6, (TKEYP) 178562306a36Sopenharmony_ci shufps $0b01001110, %xmm2, %xmm1 178662306a36Sopenharmony_ci movaps %xmm1, 0x10(TKEYP) 178762306a36Sopenharmony_ci add $0x20, TKEYP 178862306a36Sopenharmony_ci RET 178962306a36Sopenharmony_ciSYM_FUNC_END(_key_expansion_192a) 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_192b) 179262306a36Sopenharmony_ci pshufd $0b01010101, %xmm1, %xmm1 179362306a36Sopenharmony_ci shufps $0b00010000, %xmm0, %xmm4 179462306a36Sopenharmony_ci pxor %xmm4, %xmm0 179562306a36Sopenharmony_ci shufps $0b10001100, %xmm0, %xmm4 179662306a36Sopenharmony_ci pxor %xmm4, %xmm0 179762306a36Sopenharmony_ci pxor %xmm1, %xmm0 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_ci movaps %xmm2, %xmm5 180062306a36Sopenharmony_ci pslldq $4, %xmm5 180162306a36Sopenharmony_ci pshufd $0b11111111, %xmm0, %xmm3 180262306a36Sopenharmony_ci pxor %xmm3, %xmm2 180362306a36Sopenharmony_ci pxor %xmm5, %xmm2 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_ci movaps %xmm0, (TKEYP) 180662306a36Sopenharmony_ci add $0x10, TKEYP 180762306a36Sopenharmony_ci RET 180862306a36Sopenharmony_ciSYM_FUNC_END(_key_expansion_192b) 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_key_expansion_256b) 181162306a36Sopenharmony_ci pshufd $0b10101010, %xmm1, %xmm1 181262306a36Sopenharmony_ci shufps $0b00010000, %xmm2, %xmm4 181362306a36Sopenharmony_ci pxor %xmm4, %xmm2 181462306a36Sopenharmony_ci shufps $0b10001100, %xmm2, %xmm4 181562306a36Sopenharmony_ci pxor %xmm4, %xmm2 181662306a36Sopenharmony_ci pxor %xmm1, %xmm2 181762306a36Sopenharmony_ci movaps %xmm2, (TKEYP) 181862306a36Sopenharmony_ci add $0x10, TKEYP 181962306a36Sopenharmony_ci RET 182062306a36Sopenharmony_ciSYM_FUNC_END(_key_expansion_256b) 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ci/* 182362306a36Sopenharmony_ci * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 182462306a36Sopenharmony_ci * unsigned int key_len) 182562306a36Sopenharmony_ci */ 182662306a36Sopenharmony_ciSYM_FUNC_START(aesni_set_key) 182762306a36Sopenharmony_ci FRAME_BEGIN 182862306a36Sopenharmony_ci#ifndef __x86_64__ 182962306a36Sopenharmony_ci pushl KEYP 183062306a36Sopenharmony_ci movl (FRAME_OFFSET+8)(%esp), KEYP # ctx 183162306a36Sopenharmony_ci movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key 183262306a36Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), %edx # key_len 183362306a36Sopenharmony_ci#endif 183462306a36Sopenharmony_ci movups (UKEYP), %xmm0 # user key (first 16 bytes) 183562306a36Sopenharmony_ci movaps %xmm0, (KEYP) 183662306a36Sopenharmony_ci lea 0x10(KEYP), TKEYP # key addr 183762306a36Sopenharmony_ci movl %edx, 480(KEYP) 183862306a36Sopenharmony_ci pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 183962306a36Sopenharmony_ci cmp $24, %dl 184062306a36Sopenharmony_ci jb .Lenc_key128 184162306a36Sopenharmony_ci je .Lenc_key192 184262306a36Sopenharmony_ci movups 0x10(UKEYP), %xmm2 # other user key 184362306a36Sopenharmony_ci movaps %xmm2, (TKEYP) 184462306a36Sopenharmony_ci add $0x10, TKEYP 184562306a36Sopenharmony_ci aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 184662306a36Sopenharmony_ci call _key_expansion_256a 184762306a36Sopenharmony_ci aeskeygenassist $0x1, %xmm0, %xmm1 184862306a36Sopenharmony_ci call _key_expansion_256b 184962306a36Sopenharmony_ci aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 185062306a36Sopenharmony_ci call _key_expansion_256a 185162306a36Sopenharmony_ci aeskeygenassist $0x2, %xmm0, %xmm1 185262306a36Sopenharmony_ci call _key_expansion_256b 185362306a36Sopenharmony_ci aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 185462306a36Sopenharmony_ci call _key_expansion_256a 185562306a36Sopenharmony_ci aeskeygenassist $0x4, %xmm0, %xmm1 185662306a36Sopenharmony_ci call _key_expansion_256b 185762306a36Sopenharmony_ci aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 185862306a36Sopenharmony_ci call _key_expansion_256a 185962306a36Sopenharmony_ci aeskeygenassist $0x8, %xmm0, %xmm1 186062306a36Sopenharmony_ci call _key_expansion_256b 186162306a36Sopenharmony_ci aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 186262306a36Sopenharmony_ci call _key_expansion_256a 186362306a36Sopenharmony_ci aeskeygenassist $0x10, %xmm0, %xmm1 186462306a36Sopenharmony_ci call _key_expansion_256b 186562306a36Sopenharmony_ci aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 186662306a36Sopenharmony_ci call _key_expansion_256a 186762306a36Sopenharmony_ci aeskeygenassist $0x20, %xmm0, %xmm1 186862306a36Sopenharmony_ci call _key_expansion_256b 186962306a36Sopenharmony_ci aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 187062306a36Sopenharmony_ci call _key_expansion_256a 187162306a36Sopenharmony_ci jmp .Ldec_key 187262306a36Sopenharmony_ci.Lenc_key192: 187362306a36Sopenharmony_ci movq 0x10(UKEYP), %xmm2 # other user key 187462306a36Sopenharmony_ci aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 187562306a36Sopenharmony_ci call _key_expansion_192a 187662306a36Sopenharmony_ci aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 187762306a36Sopenharmony_ci call _key_expansion_192b 187862306a36Sopenharmony_ci aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 187962306a36Sopenharmony_ci call _key_expansion_192a 188062306a36Sopenharmony_ci aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 188162306a36Sopenharmony_ci call _key_expansion_192b 188262306a36Sopenharmony_ci aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 188362306a36Sopenharmony_ci call _key_expansion_192a 188462306a36Sopenharmony_ci aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 188562306a36Sopenharmony_ci call _key_expansion_192b 188662306a36Sopenharmony_ci aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 188762306a36Sopenharmony_ci call _key_expansion_192a 188862306a36Sopenharmony_ci aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 188962306a36Sopenharmony_ci call _key_expansion_192b 189062306a36Sopenharmony_ci jmp .Ldec_key 189162306a36Sopenharmony_ci.Lenc_key128: 189262306a36Sopenharmony_ci aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 189362306a36Sopenharmony_ci call _key_expansion_128 189462306a36Sopenharmony_ci aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 189562306a36Sopenharmony_ci call _key_expansion_128 189662306a36Sopenharmony_ci aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 189762306a36Sopenharmony_ci call _key_expansion_128 189862306a36Sopenharmony_ci aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 189962306a36Sopenharmony_ci call _key_expansion_128 190062306a36Sopenharmony_ci aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 190162306a36Sopenharmony_ci call _key_expansion_128 190262306a36Sopenharmony_ci aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 190362306a36Sopenharmony_ci call _key_expansion_128 190462306a36Sopenharmony_ci aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 190562306a36Sopenharmony_ci call _key_expansion_128 190662306a36Sopenharmony_ci aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 190762306a36Sopenharmony_ci call _key_expansion_128 190862306a36Sopenharmony_ci aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 190962306a36Sopenharmony_ci call _key_expansion_128 191062306a36Sopenharmony_ci aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 191162306a36Sopenharmony_ci call _key_expansion_128 191262306a36Sopenharmony_ci.Ldec_key: 191362306a36Sopenharmony_ci sub $0x10, TKEYP 191462306a36Sopenharmony_ci movaps (KEYP), %xmm0 191562306a36Sopenharmony_ci movaps (TKEYP), %xmm1 191662306a36Sopenharmony_ci movaps %xmm0, 240(TKEYP) 191762306a36Sopenharmony_ci movaps %xmm1, 240(KEYP) 191862306a36Sopenharmony_ci add $0x10, KEYP 191962306a36Sopenharmony_ci lea 240-16(TKEYP), UKEYP 192062306a36Sopenharmony_ci.align 4 192162306a36Sopenharmony_ci.Ldec_key_loop: 192262306a36Sopenharmony_ci movaps (KEYP), %xmm0 192362306a36Sopenharmony_ci aesimc %xmm0, %xmm1 192462306a36Sopenharmony_ci movaps %xmm1, (UKEYP) 192562306a36Sopenharmony_ci add $0x10, KEYP 192662306a36Sopenharmony_ci sub $0x10, UKEYP 192762306a36Sopenharmony_ci cmp TKEYP, KEYP 192862306a36Sopenharmony_ci jb .Ldec_key_loop 192962306a36Sopenharmony_ci xor AREG, AREG 193062306a36Sopenharmony_ci#ifndef __x86_64__ 193162306a36Sopenharmony_ci popl KEYP 193262306a36Sopenharmony_ci#endif 193362306a36Sopenharmony_ci FRAME_END 193462306a36Sopenharmony_ci RET 193562306a36Sopenharmony_ciSYM_FUNC_END(aesni_set_key) 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci/* 193862306a36Sopenharmony_ci * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) 193962306a36Sopenharmony_ci */ 194062306a36Sopenharmony_ciSYM_FUNC_START(aesni_enc) 194162306a36Sopenharmony_ci FRAME_BEGIN 194262306a36Sopenharmony_ci#ifndef __x86_64__ 194362306a36Sopenharmony_ci pushl KEYP 194462306a36Sopenharmony_ci pushl KLEN 194562306a36Sopenharmony_ci movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 194662306a36Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), OUTP # dst 194762306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), INP # src 194862306a36Sopenharmony_ci#endif 194962306a36Sopenharmony_ci movl 480(KEYP), KLEN # key length 195062306a36Sopenharmony_ci movups (INP), STATE # input 195162306a36Sopenharmony_ci call _aesni_enc1 195262306a36Sopenharmony_ci movups STATE, (OUTP) # output 195362306a36Sopenharmony_ci#ifndef __x86_64__ 195462306a36Sopenharmony_ci popl KLEN 195562306a36Sopenharmony_ci popl KEYP 195662306a36Sopenharmony_ci#endif 195762306a36Sopenharmony_ci FRAME_END 195862306a36Sopenharmony_ci RET 195962306a36Sopenharmony_ciSYM_FUNC_END(aesni_enc) 196062306a36Sopenharmony_ci 196162306a36Sopenharmony_ci/* 196262306a36Sopenharmony_ci * _aesni_enc1: internal ABI 196362306a36Sopenharmony_ci * input: 196462306a36Sopenharmony_ci * KEYP: key struct pointer 196562306a36Sopenharmony_ci * KLEN: round count 196662306a36Sopenharmony_ci * STATE: initial state (input) 196762306a36Sopenharmony_ci * output: 196862306a36Sopenharmony_ci * STATE: finial state (output) 196962306a36Sopenharmony_ci * changed: 197062306a36Sopenharmony_ci * KEY 197162306a36Sopenharmony_ci * TKEYP (T1) 197262306a36Sopenharmony_ci */ 197362306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_enc1) 197462306a36Sopenharmony_ci movaps (KEYP), KEY # key 197562306a36Sopenharmony_ci mov KEYP, TKEYP 197662306a36Sopenharmony_ci pxor KEY, STATE # round 0 197762306a36Sopenharmony_ci add $0x30, TKEYP 197862306a36Sopenharmony_ci cmp $24, KLEN 197962306a36Sopenharmony_ci jb .Lenc128 198062306a36Sopenharmony_ci lea 0x20(TKEYP), TKEYP 198162306a36Sopenharmony_ci je .Lenc192 198262306a36Sopenharmony_ci add $0x20, TKEYP 198362306a36Sopenharmony_ci movaps -0x60(TKEYP), KEY 198462306a36Sopenharmony_ci aesenc KEY, STATE 198562306a36Sopenharmony_ci movaps -0x50(TKEYP), KEY 198662306a36Sopenharmony_ci aesenc KEY, STATE 198762306a36Sopenharmony_ci.align 4 198862306a36Sopenharmony_ci.Lenc192: 198962306a36Sopenharmony_ci movaps -0x40(TKEYP), KEY 199062306a36Sopenharmony_ci aesenc KEY, STATE 199162306a36Sopenharmony_ci movaps -0x30(TKEYP), KEY 199262306a36Sopenharmony_ci aesenc KEY, STATE 199362306a36Sopenharmony_ci.align 4 199462306a36Sopenharmony_ci.Lenc128: 199562306a36Sopenharmony_ci movaps -0x20(TKEYP), KEY 199662306a36Sopenharmony_ci aesenc KEY, STATE 199762306a36Sopenharmony_ci movaps -0x10(TKEYP), KEY 199862306a36Sopenharmony_ci aesenc KEY, STATE 199962306a36Sopenharmony_ci movaps (TKEYP), KEY 200062306a36Sopenharmony_ci aesenc KEY, STATE 200162306a36Sopenharmony_ci movaps 0x10(TKEYP), KEY 200262306a36Sopenharmony_ci aesenc KEY, STATE 200362306a36Sopenharmony_ci movaps 0x20(TKEYP), KEY 200462306a36Sopenharmony_ci aesenc KEY, STATE 200562306a36Sopenharmony_ci movaps 0x30(TKEYP), KEY 200662306a36Sopenharmony_ci aesenc KEY, STATE 200762306a36Sopenharmony_ci movaps 0x40(TKEYP), KEY 200862306a36Sopenharmony_ci aesenc KEY, STATE 200962306a36Sopenharmony_ci movaps 0x50(TKEYP), KEY 201062306a36Sopenharmony_ci aesenc KEY, STATE 201162306a36Sopenharmony_ci movaps 0x60(TKEYP), KEY 201262306a36Sopenharmony_ci aesenc KEY, STATE 201362306a36Sopenharmony_ci movaps 0x70(TKEYP), KEY 201462306a36Sopenharmony_ci aesenclast KEY, STATE 201562306a36Sopenharmony_ci RET 201662306a36Sopenharmony_ciSYM_FUNC_END(_aesni_enc1) 201762306a36Sopenharmony_ci 201862306a36Sopenharmony_ci/* 201962306a36Sopenharmony_ci * _aesni_enc4: internal ABI 202062306a36Sopenharmony_ci * input: 202162306a36Sopenharmony_ci * KEYP: key struct pointer 202262306a36Sopenharmony_ci * KLEN: round count 202362306a36Sopenharmony_ci * STATE1: initial state (input) 202462306a36Sopenharmony_ci * STATE2 202562306a36Sopenharmony_ci * STATE3 202662306a36Sopenharmony_ci * STATE4 202762306a36Sopenharmony_ci * output: 202862306a36Sopenharmony_ci * STATE1: finial state (output) 202962306a36Sopenharmony_ci * STATE2 203062306a36Sopenharmony_ci * STATE3 203162306a36Sopenharmony_ci * STATE4 203262306a36Sopenharmony_ci * changed: 203362306a36Sopenharmony_ci * KEY 203462306a36Sopenharmony_ci * TKEYP (T1) 203562306a36Sopenharmony_ci */ 203662306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_enc4) 203762306a36Sopenharmony_ci movaps (KEYP), KEY # key 203862306a36Sopenharmony_ci mov KEYP, TKEYP 203962306a36Sopenharmony_ci pxor KEY, STATE1 # round 0 204062306a36Sopenharmony_ci pxor KEY, STATE2 204162306a36Sopenharmony_ci pxor KEY, STATE3 204262306a36Sopenharmony_ci pxor KEY, STATE4 204362306a36Sopenharmony_ci add $0x30, TKEYP 204462306a36Sopenharmony_ci cmp $24, KLEN 204562306a36Sopenharmony_ci jb .L4enc128 204662306a36Sopenharmony_ci lea 0x20(TKEYP), TKEYP 204762306a36Sopenharmony_ci je .L4enc192 204862306a36Sopenharmony_ci add $0x20, TKEYP 204962306a36Sopenharmony_ci movaps -0x60(TKEYP), KEY 205062306a36Sopenharmony_ci aesenc KEY, STATE1 205162306a36Sopenharmony_ci aesenc KEY, STATE2 205262306a36Sopenharmony_ci aesenc KEY, STATE3 205362306a36Sopenharmony_ci aesenc KEY, STATE4 205462306a36Sopenharmony_ci movaps -0x50(TKEYP), KEY 205562306a36Sopenharmony_ci aesenc KEY, STATE1 205662306a36Sopenharmony_ci aesenc KEY, STATE2 205762306a36Sopenharmony_ci aesenc KEY, STATE3 205862306a36Sopenharmony_ci aesenc KEY, STATE4 205962306a36Sopenharmony_ci#.align 4 206062306a36Sopenharmony_ci.L4enc192: 206162306a36Sopenharmony_ci movaps -0x40(TKEYP), KEY 206262306a36Sopenharmony_ci aesenc KEY, STATE1 206362306a36Sopenharmony_ci aesenc KEY, STATE2 206462306a36Sopenharmony_ci aesenc KEY, STATE3 206562306a36Sopenharmony_ci aesenc KEY, STATE4 206662306a36Sopenharmony_ci movaps -0x30(TKEYP), KEY 206762306a36Sopenharmony_ci aesenc KEY, STATE1 206862306a36Sopenharmony_ci aesenc KEY, STATE2 206962306a36Sopenharmony_ci aesenc KEY, STATE3 207062306a36Sopenharmony_ci aesenc KEY, STATE4 207162306a36Sopenharmony_ci#.align 4 207262306a36Sopenharmony_ci.L4enc128: 207362306a36Sopenharmony_ci movaps -0x20(TKEYP), KEY 207462306a36Sopenharmony_ci aesenc KEY, STATE1 207562306a36Sopenharmony_ci aesenc KEY, STATE2 207662306a36Sopenharmony_ci aesenc KEY, STATE3 207762306a36Sopenharmony_ci aesenc KEY, STATE4 207862306a36Sopenharmony_ci movaps -0x10(TKEYP), KEY 207962306a36Sopenharmony_ci aesenc KEY, STATE1 208062306a36Sopenharmony_ci aesenc KEY, STATE2 208162306a36Sopenharmony_ci aesenc KEY, STATE3 208262306a36Sopenharmony_ci aesenc KEY, STATE4 208362306a36Sopenharmony_ci movaps (TKEYP), KEY 208462306a36Sopenharmony_ci aesenc KEY, STATE1 208562306a36Sopenharmony_ci aesenc KEY, STATE2 208662306a36Sopenharmony_ci aesenc KEY, STATE3 208762306a36Sopenharmony_ci aesenc KEY, STATE4 208862306a36Sopenharmony_ci movaps 0x10(TKEYP), KEY 208962306a36Sopenharmony_ci aesenc KEY, STATE1 209062306a36Sopenharmony_ci aesenc KEY, STATE2 209162306a36Sopenharmony_ci aesenc KEY, STATE3 209262306a36Sopenharmony_ci aesenc KEY, STATE4 209362306a36Sopenharmony_ci movaps 0x20(TKEYP), KEY 209462306a36Sopenharmony_ci aesenc KEY, STATE1 209562306a36Sopenharmony_ci aesenc KEY, STATE2 209662306a36Sopenharmony_ci aesenc KEY, STATE3 209762306a36Sopenharmony_ci aesenc KEY, STATE4 209862306a36Sopenharmony_ci movaps 0x30(TKEYP), KEY 209962306a36Sopenharmony_ci aesenc KEY, STATE1 210062306a36Sopenharmony_ci aesenc KEY, STATE2 210162306a36Sopenharmony_ci aesenc KEY, STATE3 210262306a36Sopenharmony_ci aesenc KEY, STATE4 210362306a36Sopenharmony_ci movaps 0x40(TKEYP), KEY 210462306a36Sopenharmony_ci aesenc KEY, STATE1 210562306a36Sopenharmony_ci aesenc KEY, STATE2 210662306a36Sopenharmony_ci aesenc KEY, STATE3 210762306a36Sopenharmony_ci aesenc KEY, STATE4 210862306a36Sopenharmony_ci movaps 0x50(TKEYP), KEY 210962306a36Sopenharmony_ci aesenc KEY, STATE1 211062306a36Sopenharmony_ci aesenc KEY, STATE2 211162306a36Sopenharmony_ci aesenc KEY, STATE3 211262306a36Sopenharmony_ci aesenc KEY, STATE4 211362306a36Sopenharmony_ci movaps 0x60(TKEYP), KEY 211462306a36Sopenharmony_ci aesenc KEY, STATE1 211562306a36Sopenharmony_ci aesenc KEY, STATE2 211662306a36Sopenharmony_ci aesenc KEY, STATE3 211762306a36Sopenharmony_ci aesenc KEY, STATE4 211862306a36Sopenharmony_ci movaps 0x70(TKEYP), KEY 211962306a36Sopenharmony_ci aesenclast KEY, STATE1 # last round 212062306a36Sopenharmony_ci aesenclast KEY, STATE2 212162306a36Sopenharmony_ci aesenclast KEY, STATE3 212262306a36Sopenharmony_ci aesenclast KEY, STATE4 212362306a36Sopenharmony_ci RET 212462306a36Sopenharmony_ciSYM_FUNC_END(_aesni_enc4) 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci/* 212762306a36Sopenharmony_ci * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) 212862306a36Sopenharmony_ci */ 212962306a36Sopenharmony_ciSYM_FUNC_START(aesni_dec) 213062306a36Sopenharmony_ci FRAME_BEGIN 213162306a36Sopenharmony_ci#ifndef __x86_64__ 213262306a36Sopenharmony_ci pushl KEYP 213362306a36Sopenharmony_ci pushl KLEN 213462306a36Sopenharmony_ci movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 213562306a36Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), OUTP # dst 213662306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), INP # src 213762306a36Sopenharmony_ci#endif 213862306a36Sopenharmony_ci mov 480(KEYP), KLEN # key length 213962306a36Sopenharmony_ci add $240, KEYP 214062306a36Sopenharmony_ci movups (INP), STATE # input 214162306a36Sopenharmony_ci call _aesni_dec1 214262306a36Sopenharmony_ci movups STATE, (OUTP) #output 214362306a36Sopenharmony_ci#ifndef __x86_64__ 214462306a36Sopenharmony_ci popl KLEN 214562306a36Sopenharmony_ci popl KEYP 214662306a36Sopenharmony_ci#endif 214762306a36Sopenharmony_ci FRAME_END 214862306a36Sopenharmony_ci RET 214962306a36Sopenharmony_ciSYM_FUNC_END(aesni_dec) 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ci/* 215262306a36Sopenharmony_ci * _aesni_dec1: internal ABI 215362306a36Sopenharmony_ci * input: 215462306a36Sopenharmony_ci * KEYP: key struct pointer 215562306a36Sopenharmony_ci * KLEN: key length 215662306a36Sopenharmony_ci * STATE: initial state (input) 215762306a36Sopenharmony_ci * output: 215862306a36Sopenharmony_ci * STATE: finial state (output) 215962306a36Sopenharmony_ci * changed: 216062306a36Sopenharmony_ci * KEY 216162306a36Sopenharmony_ci * TKEYP (T1) 216262306a36Sopenharmony_ci */ 216362306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_dec1) 216462306a36Sopenharmony_ci movaps (KEYP), KEY # key 216562306a36Sopenharmony_ci mov KEYP, TKEYP 216662306a36Sopenharmony_ci pxor KEY, STATE # round 0 216762306a36Sopenharmony_ci add $0x30, TKEYP 216862306a36Sopenharmony_ci cmp $24, KLEN 216962306a36Sopenharmony_ci jb .Ldec128 217062306a36Sopenharmony_ci lea 0x20(TKEYP), TKEYP 217162306a36Sopenharmony_ci je .Ldec192 217262306a36Sopenharmony_ci add $0x20, TKEYP 217362306a36Sopenharmony_ci movaps -0x60(TKEYP), KEY 217462306a36Sopenharmony_ci aesdec KEY, STATE 217562306a36Sopenharmony_ci movaps -0x50(TKEYP), KEY 217662306a36Sopenharmony_ci aesdec KEY, STATE 217762306a36Sopenharmony_ci.align 4 217862306a36Sopenharmony_ci.Ldec192: 217962306a36Sopenharmony_ci movaps -0x40(TKEYP), KEY 218062306a36Sopenharmony_ci aesdec KEY, STATE 218162306a36Sopenharmony_ci movaps -0x30(TKEYP), KEY 218262306a36Sopenharmony_ci aesdec KEY, STATE 218362306a36Sopenharmony_ci.align 4 218462306a36Sopenharmony_ci.Ldec128: 218562306a36Sopenharmony_ci movaps -0x20(TKEYP), KEY 218662306a36Sopenharmony_ci aesdec KEY, STATE 218762306a36Sopenharmony_ci movaps -0x10(TKEYP), KEY 218862306a36Sopenharmony_ci aesdec KEY, STATE 218962306a36Sopenharmony_ci movaps (TKEYP), KEY 219062306a36Sopenharmony_ci aesdec KEY, STATE 219162306a36Sopenharmony_ci movaps 0x10(TKEYP), KEY 219262306a36Sopenharmony_ci aesdec KEY, STATE 219362306a36Sopenharmony_ci movaps 0x20(TKEYP), KEY 219462306a36Sopenharmony_ci aesdec KEY, STATE 219562306a36Sopenharmony_ci movaps 0x30(TKEYP), KEY 219662306a36Sopenharmony_ci aesdec KEY, STATE 219762306a36Sopenharmony_ci movaps 0x40(TKEYP), KEY 219862306a36Sopenharmony_ci aesdec KEY, STATE 219962306a36Sopenharmony_ci movaps 0x50(TKEYP), KEY 220062306a36Sopenharmony_ci aesdec KEY, STATE 220162306a36Sopenharmony_ci movaps 0x60(TKEYP), KEY 220262306a36Sopenharmony_ci aesdec KEY, STATE 220362306a36Sopenharmony_ci movaps 0x70(TKEYP), KEY 220462306a36Sopenharmony_ci aesdeclast KEY, STATE 220562306a36Sopenharmony_ci RET 220662306a36Sopenharmony_ciSYM_FUNC_END(_aesni_dec1) 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci/* 220962306a36Sopenharmony_ci * _aesni_dec4: internal ABI 221062306a36Sopenharmony_ci * input: 221162306a36Sopenharmony_ci * KEYP: key struct pointer 221262306a36Sopenharmony_ci * KLEN: key length 221362306a36Sopenharmony_ci * STATE1: initial state (input) 221462306a36Sopenharmony_ci * STATE2 221562306a36Sopenharmony_ci * STATE3 221662306a36Sopenharmony_ci * STATE4 221762306a36Sopenharmony_ci * output: 221862306a36Sopenharmony_ci * STATE1: finial state (output) 221962306a36Sopenharmony_ci * STATE2 222062306a36Sopenharmony_ci * STATE3 222162306a36Sopenharmony_ci * STATE4 222262306a36Sopenharmony_ci * changed: 222362306a36Sopenharmony_ci * KEY 222462306a36Sopenharmony_ci * TKEYP (T1) 222562306a36Sopenharmony_ci */ 222662306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_dec4) 222762306a36Sopenharmony_ci movaps (KEYP), KEY # key 222862306a36Sopenharmony_ci mov KEYP, TKEYP 222962306a36Sopenharmony_ci pxor KEY, STATE1 # round 0 223062306a36Sopenharmony_ci pxor KEY, STATE2 223162306a36Sopenharmony_ci pxor KEY, STATE3 223262306a36Sopenharmony_ci pxor KEY, STATE4 223362306a36Sopenharmony_ci add $0x30, TKEYP 223462306a36Sopenharmony_ci cmp $24, KLEN 223562306a36Sopenharmony_ci jb .L4dec128 223662306a36Sopenharmony_ci lea 0x20(TKEYP), TKEYP 223762306a36Sopenharmony_ci je .L4dec192 223862306a36Sopenharmony_ci add $0x20, TKEYP 223962306a36Sopenharmony_ci movaps -0x60(TKEYP), KEY 224062306a36Sopenharmony_ci aesdec KEY, STATE1 224162306a36Sopenharmony_ci aesdec KEY, STATE2 224262306a36Sopenharmony_ci aesdec KEY, STATE3 224362306a36Sopenharmony_ci aesdec KEY, STATE4 224462306a36Sopenharmony_ci movaps -0x50(TKEYP), KEY 224562306a36Sopenharmony_ci aesdec KEY, STATE1 224662306a36Sopenharmony_ci aesdec KEY, STATE2 224762306a36Sopenharmony_ci aesdec KEY, STATE3 224862306a36Sopenharmony_ci aesdec KEY, STATE4 224962306a36Sopenharmony_ci.align 4 225062306a36Sopenharmony_ci.L4dec192: 225162306a36Sopenharmony_ci movaps -0x40(TKEYP), KEY 225262306a36Sopenharmony_ci aesdec KEY, STATE1 225362306a36Sopenharmony_ci aesdec KEY, STATE2 225462306a36Sopenharmony_ci aesdec KEY, STATE3 225562306a36Sopenharmony_ci aesdec KEY, STATE4 225662306a36Sopenharmony_ci movaps -0x30(TKEYP), KEY 225762306a36Sopenharmony_ci aesdec KEY, STATE1 225862306a36Sopenharmony_ci aesdec KEY, STATE2 225962306a36Sopenharmony_ci aesdec KEY, STATE3 226062306a36Sopenharmony_ci aesdec KEY, STATE4 226162306a36Sopenharmony_ci.align 4 226262306a36Sopenharmony_ci.L4dec128: 226362306a36Sopenharmony_ci movaps -0x20(TKEYP), KEY 226462306a36Sopenharmony_ci aesdec KEY, STATE1 226562306a36Sopenharmony_ci aesdec KEY, STATE2 226662306a36Sopenharmony_ci aesdec KEY, STATE3 226762306a36Sopenharmony_ci aesdec KEY, STATE4 226862306a36Sopenharmony_ci movaps -0x10(TKEYP), KEY 226962306a36Sopenharmony_ci aesdec KEY, STATE1 227062306a36Sopenharmony_ci aesdec KEY, STATE2 227162306a36Sopenharmony_ci aesdec KEY, STATE3 227262306a36Sopenharmony_ci aesdec KEY, STATE4 227362306a36Sopenharmony_ci movaps (TKEYP), KEY 227462306a36Sopenharmony_ci aesdec KEY, STATE1 227562306a36Sopenharmony_ci aesdec KEY, STATE2 227662306a36Sopenharmony_ci aesdec KEY, STATE3 227762306a36Sopenharmony_ci aesdec KEY, STATE4 227862306a36Sopenharmony_ci movaps 0x10(TKEYP), KEY 227962306a36Sopenharmony_ci aesdec KEY, STATE1 228062306a36Sopenharmony_ci aesdec KEY, STATE2 228162306a36Sopenharmony_ci aesdec KEY, STATE3 228262306a36Sopenharmony_ci aesdec KEY, STATE4 228362306a36Sopenharmony_ci movaps 0x20(TKEYP), KEY 228462306a36Sopenharmony_ci aesdec KEY, STATE1 228562306a36Sopenharmony_ci aesdec KEY, STATE2 228662306a36Sopenharmony_ci aesdec KEY, STATE3 228762306a36Sopenharmony_ci aesdec KEY, STATE4 228862306a36Sopenharmony_ci movaps 0x30(TKEYP), KEY 228962306a36Sopenharmony_ci aesdec KEY, STATE1 229062306a36Sopenharmony_ci aesdec KEY, STATE2 229162306a36Sopenharmony_ci aesdec KEY, STATE3 229262306a36Sopenharmony_ci aesdec KEY, STATE4 229362306a36Sopenharmony_ci movaps 0x40(TKEYP), KEY 229462306a36Sopenharmony_ci aesdec KEY, STATE1 229562306a36Sopenharmony_ci aesdec KEY, STATE2 229662306a36Sopenharmony_ci aesdec KEY, STATE3 229762306a36Sopenharmony_ci aesdec KEY, STATE4 229862306a36Sopenharmony_ci movaps 0x50(TKEYP), KEY 229962306a36Sopenharmony_ci aesdec KEY, STATE1 230062306a36Sopenharmony_ci aesdec KEY, STATE2 230162306a36Sopenharmony_ci aesdec KEY, STATE3 230262306a36Sopenharmony_ci aesdec KEY, STATE4 230362306a36Sopenharmony_ci movaps 0x60(TKEYP), KEY 230462306a36Sopenharmony_ci aesdec KEY, STATE1 230562306a36Sopenharmony_ci aesdec KEY, STATE2 230662306a36Sopenharmony_ci aesdec KEY, STATE3 230762306a36Sopenharmony_ci aesdec KEY, STATE4 230862306a36Sopenharmony_ci movaps 0x70(TKEYP), KEY 230962306a36Sopenharmony_ci aesdeclast KEY, STATE1 # last round 231062306a36Sopenharmony_ci aesdeclast KEY, STATE2 231162306a36Sopenharmony_ci aesdeclast KEY, STATE3 231262306a36Sopenharmony_ci aesdeclast KEY, STATE4 231362306a36Sopenharmony_ci RET 231462306a36Sopenharmony_ciSYM_FUNC_END(_aesni_dec4) 231562306a36Sopenharmony_ci 231662306a36Sopenharmony_ci/* 231762306a36Sopenharmony_ci * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 231862306a36Sopenharmony_ci * size_t len) 231962306a36Sopenharmony_ci */ 232062306a36Sopenharmony_ciSYM_FUNC_START(aesni_ecb_enc) 232162306a36Sopenharmony_ci FRAME_BEGIN 232262306a36Sopenharmony_ci#ifndef __x86_64__ 232362306a36Sopenharmony_ci pushl LEN 232462306a36Sopenharmony_ci pushl KEYP 232562306a36Sopenharmony_ci pushl KLEN 232662306a36Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 232762306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), OUTP # dst 232862306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), INP # src 232962306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), LEN # len 233062306a36Sopenharmony_ci#endif 233162306a36Sopenharmony_ci test LEN, LEN # check length 233262306a36Sopenharmony_ci jz .Lecb_enc_ret 233362306a36Sopenharmony_ci mov 480(KEYP), KLEN 233462306a36Sopenharmony_ci cmp $16, LEN 233562306a36Sopenharmony_ci jb .Lecb_enc_ret 233662306a36Sopenharmony_ci cmp $64, LEN 233762306a36Sopenharmony_ci jb .Lecb_enc_loop1 233862306a36Sopenharmony_ci.align 4 233962306a36Sopenharmony_ci.Lecb_enc_loop4: 234062306a36Sopenharmony_ci movups (INP), STATE1 234162306a36Sopenharmony_ci movups 0x10(INP), STATE2 234262306a36Sopenharmony_ci movups 0x20(INP), STATE3 234362306a36Sopenharmony_ci movups 0x30(INP), STATE4 234462306a36Sopenharmony_ci call _aesni_enc4 234562306a36Sopenharmony_ci movups STATE1, (OUTP) 234662306a36Sopenharmony_ci movups STATE2, 0x10(OUTP) 234762306a36Sopenharmony_ci movups STATE3, 0x20(OUTP) 234862306a36Sopenharmony_ci movups STATE4, 0x30(OUTP) 234962306a36Sopenharmony_ci sub $64, LEN 235062306a36Sopenharmony_ci add $64, INP 235162306a36Sopenharmony_ci add $64, OUTP 235262306a36Sopenharmony_ci cmp $64, LEN 235362306a36Sopenharmony_ci jge .Lecb_enc_loop4 235462306a36Sopenharmony_ci cmp $16, LEN 235562306a36Sopenharmony_ci jb .Lecb_enc_ret 235662306a36Sopenharmony_ci.align 4 235762306a36Sopenharmony_ci.Lecb_enc_loop1: 235862306a36Sopenharmony_ci movups (INP), STATE1 235962306a36Sopenharmony_ci call _aesni_enc1 236062306a36Sopenharmony_ci movups STATE1, (OUTP) 236162306a36Sopenharmony_ci sub $16, LEN 236262306a36Sopenharmony_ci add $16, INP 236362306a36Sopenharmony_ci add $16, OUTP 236462306a36Sopenharmony_ci cmp $16, LEN 236562306a36Sopenharmony_ci jge .Lecb_enc_loop1 236662306a36Sopenharmony_ci.Lecb_enc_ret: 236762306a36Sopenharmony_ci#ifndef __x86_64__ 236862306a36Sopenharmony_ci popl KLEN 236962306a36Sopenharmony_ci popl KEYP 237062306a36Sopenharmony_ci popl LEN 237162306a36Sopenharmony_ci#endif 237262306a36Sopenharmony_ci FRAME_END 237362306a36Sopenharmony_ci RET 237462306a36Sopenharmony_ciSYM_FUNC_END(aesni_ecb_enc) 237562306a36Sopenharmony_ci 237662306a36Sopenharmony_ci/* 237762306a36Sopenharmony_ci * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 237862306a36Sopenharmony_ci * size_t len); 237962306a36Sopenharmony_ci */ 238062306a36Sopenharmony_ciSYM_FUNC_START(aesni_ecb_dec) 238162306a36Sopenharmony_ci FRAME_BEGIN 238262306a36Sopenharmony_ci#ifndef __x86_64__ 238362306a36Sopenharmony_ci pushl LEN 238462306a36Sopenharmony_ci pushl KEYP 238562306a36Sopenharmony_ci pushl KLEN 238662306a36Sopenharmony_ci movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 238762306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), OUTP # dst 238862306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), INP # src 238962306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), LEN # len 239062306a36Sopenharmony_ci#endif 239162306a36Sopenharmony_ci test LEN, LEN 239262306a36Sopenharmony_ci jz .Lecb_dec_ret 239362306a36Sopenharmony_ci mov 480(KEYP), KLEN 239462306a36Sopenharmony_ci add $240, KEYP 239562306a36Sopenharmony_ci cmp $16, LEN 239662306a36Sopenharmony_ci jb .Lecb_dec_ret 239762306a36Sopenharmony_ci cmp $64, LEN 239862306a36Sopenharmony_ci jb .Lecb_dec_loop1 239962306a36Sopenharmony_ci.align 4 240062306a36Sopenharmony_ci.Lecb_dec_loop4: 240162306a36Sopenharmony_ci movups (INP), STATE1 240262306a36Sopenharmony_ci movups 0x10(INP), STATE2 240362306a36Sopenharmony_ci movups 0x20(INP), STATE3 240462306a36Sopenharmony_ci movups 0x30(INP), STATE4 240562306a36Sopenharmony_ci call _aesni_dec4 240662306a36Sopenharmony_ci movups STATE1, (OUTP) 240762306a36Sopenharmony_ci movups STATE2, 0x10(OUTP) 240862306a36Sopenharmony_ci movups STATE3, 0x20(OUTP) 240962306a36Sopenharmony_ci movups STATE4, 0x30(OUTP) 241062306a36Sopenharmony_ci sub $64, LEN 241162306a36Sopenharmony_ci add $64, INP 241262306a36Sopenharmony_ci add $64, OUTP 241362306a36Sopenharmony_ci cmp $64, LEN 241462306a36Sopenharmony_ci jge .Lecb_dec_loop4 241562306a36Sopenharmony_ci cmp $16, LEN 241662306a36Sopenharmony_ci jb .Lecb_dec_ret 241762306a36Sopenharmony_ci.align 4 241862306a36Sopenharmony_ci.Lecb_dec_loop1: 241962306a36Sopenharmony_ci movups (INP), STATE1 242062306a36Sopenharmony_ci call _aesni_dec1 242162306a36Sopenharmony_ci movups STATE1, (OUTP) 242262306a36Sopenharmony_ci sub $16, LEN 242362306a36Sopenharmony_ci add $16, INP 242462306a36Sopenharmony_ci add $16, OUTP 242562306a36Sopenharmony_ci cmp $16, LEN 242662306a36Sopenharmony_ci jge .Lecb_dec_loop1 242762306a36Sopenharmony_ci.Lecb_dec_ret: 242862306a36Sopenharmony_ci#ifndef __x86_64__ 242962306a36Sopenharmony_ci popl KLEN 243062306a36Sopenharmony_ci popl KEYP 243162306a36Sopenharmony_ci popl LEN 243262306a36Sopenharmony_ci#endif 243362306a36Sopenharmony_ci FRAME_END 243462306a36Sopenharmony_ci RET 243562306a36Sopenharmony_ciSYM_FUNC_END(aesni_ecb_dec) 243662306a36Sopenharmony_ci 243762306a36Sopenharmony_ci/* 243862306a36Sopenharmony_ci * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 243962306a36Sopenharmony_ci * size_t len, u8 *iv) 244062306a36Sopenharmony_ci */ 244162306a36Sopenharmony_ciSYM_FUNC_START(aesni_cbc_enc) 244262306a36Sopenharmony_ci FRAME_BEGIN 244362306a36Sopenharmony_ci#ifndef __x86_64__ 244462306a36Sopenharmony_ci pushl IVP 244562306a36Sopenharmony_ci pushl LEN 244662306a36Sopenharmony_ci pushl KEYP 244762306a36Sopenharmony_ci pushl KLEN 244862306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 244962306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 245062306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 245162306a36Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 245262306a36Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 245362306a36Sopenharmony_ci#endif 245462306a36Sopenharmony_ci cmp $16, LEN 245562306a36Sopenharmony_ci jb .Lcbc_enc_ret 245662306a36Sopenharmony_ci mov 480(KEYP), KLEN 245762306a36Sopenharmony_ci movups (IVP), STATE # load iv as initial state 245862306a36Sopenharmony_ci.align 4 245962306a36Sopenharmony_ci.Lcbc_enc_loop: 246062306a36Sopenharmony_ci movups (INP), IN # load input 246162306a36Sopenharmony_ci pxor IN, STATE 246262306a36Sopenharmony_ci call _aesni_enc1 246362306a36Sopenharmony_ci movups STATE, (OUTP) # store output 246462306a36Sopenharmony_ci sub $16, LEN 246562306a36Sopenharmony_ci add $16, INP 246662306a36Sopenharmony_ci add $16, OUTP 246762306a36Sopenharmony_ci cmp $16, LEN 246862306a36Sopenharmony_ci jge .Lcbc_enc_loop 246962306a36Sopenharmony_ci movups STATE, (IVP) 247062306a36Sopenharmony_ci.Lcbc_enc_ret: 247162306a36Sopenharmony_ci#ifndef __x86_64__ 247262306a36Sopenharmony_ci popl KLEN 247362306a36Sopenharmony_ci popl KEYP 247462306a36Sopenharmony_ci popl LEN 247562306a36Sopenharmony_ci popl IVP 247662306a36Sopenharmony_ci#endif 247762306a36Sopenharmony_ci FRAME_END 247862306a36Sopenharmony_ci RET 247962306a36Sopenharmony_ciSYM_FUNC_END(aesni_cbc_enc) 248062306a36Sopenharmony_ci 248162306a36Sopenharmony_ci/* 248262306a36Sopenharmony_ci * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 248362306a36Sopenharmony_ci * size_t len, u8 *iv) 248462306a36Sopenharmony_ci */ 248562306a36Sopenharmony_ciSYM_FUNC_START(aesni_cbc_dec) 248662306a36Sopenharmony_ci FRAME_BEGIN 248762306a36Sopenharmony_ci#ifndef __x86_64__ 248862306a36Sopenharmony_ci pushl IVP 248962306a36Sopenharmony_ci pushl LEN 249062306a36Sopenharmony_ci pushl KEYP 249162306a36Sopenharmony_ci pushl KLEN 249262306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 249362306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 249462306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 249562306a36Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 249662306a36Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 249762306a36Sopenharmony_ci#endif 249862306a36Sopenharmony_ci cmp $16, LEN 249962306a36Sopenharmony_ci jb .Lcbc_dec_just_ret 250062306a36Sopenharmony_ci mov 480(KEYP), KLEN 250162306a36Sopenharmony_ci add $240, KEYP 250262306a36Sopenharmony_ci movups (IVP), IV 250362306a36Sopenharmony_ci cmp $64, LEN 250462306a36Sopenharmony_ci jb .Lcbc_dec_loop1 250562306a36Sopenharmony_ci.align 4 250662306a36Sopenharmony_ci.Lcbc_dec_loop4: 250762306a36Sopenharmony_ci movups (INP), IN1 250862306a36Sopenharmony_ci movaps IN1, STATE1 250962306a36Sopenharmony_ci movups 0x10(INP), IN2 251062306a36Sopenharmony_ci movaps IN2, STATE2 251162306a36Sopenharmony_ci#ifdef __x86_64__ 251262306a36Sopenharmony_ci movups 0x20(INP), IN3 251362306a36Sopenharmony_ci movaps IN3, STATE3 251462306a36Sopenharmony_ci movups 0x30(INP), IN4 251562306a36Sopenharmony_ci movaps IN4, STATE4 251662306a36Sopenharmony_ci#else 251762306a36Sopenharmony_ci movups 0x20(INP), IN1 251862306a36Sopenharmony_ci movaps IN1, STATE3 251962306a36Sopenharmony_ci movups 0x30(INP), IN2 252062306a36Sopenharmony_ci movaps IN2, STATE4 252162306a36Sopenharmony_ci#endif 252262306a36Sopenharmony_ci call _aesni_dec4 252362306a36Sopenharmony_ci pxor IV, STATE1 252462306a36Sopenharmony_ci#ifdef __x86_64__ 252562306a36Sopenharmony_ci pxor IN1, STATE2 252662306a36Sopenharmony_ci pxor IN2, STATE3 252762306a36Sopenharmony_ci pxor IN3, STATE4 252862306a36Sopenharmony_ci movaps IN4, IV 252962306a36Sopenharmony_ci#else 253062306a36Sopenharmony_ci pxor IN1, STATE4 253162306a36Sopenharmony_ci movaps IN2, IV 253262306a36Sopenharmony_ci movups (INP), IN1 253362306a36Sopenharmony_ci pxor IN1, STATE2 253462306a36Sopenharmony_ci movups 0x10(INP), IN2 253562306a36Sopenharmony_ci pxor IN2, STATE3 253662306a36Sopenharmony_ci#endif 253762306a36Sopenharmony_ci movups STATE1, (OUTP) 253862306a36Sopenharmony_ci movups STATE2, 0x10(OUTP) 253962306a36Sopenharmony_ci movups STATE3, 0x20(OUTP) 254062306a36Sopenharmony_ci movups STATE4, 0x30(OUTP) 254162306a36Sopenharmony_ci sub $64, LEN 254262306a36Sopenharmony_ci add $64, INP 254362306a36Sopenharmony_ci add $64, OUTP 254462306a36Sopenharmony_ci cmp $64, LEN 254562306a36Sopenharmony_ci jge .Lcbc_dec_loop4 254662306a36Sopenharmony_ci cmp $16, LEN 254762306a36Sopenharmony_ci jb .Lcbc_dec_ret 254862306a36Sopenharmony_ci.align 4 254962306a36Sopenharmony_ci.Lcbc_dec_loop1: 255062306a36Sopenharmony_ci movups (INP), IN 255162306a36Sopenharmony_ci movaps IN, STATE 255262306a36Sopenharmony_ci call _aesni_dec1 255362306a36Sopenharmony_ci pxor IV, STATE 255462306a36Sopenharmony_ci movups STATE, (OUTP) 255562306a36Sopenharmony_ci movaps IN, IV 255662306a36Sopenharmony_ci sub $16, LEN 255762306a36Sopenharmony_ci add $16, INP 255862306a36Sopenharmony_ci add $16, OUTP 255962306a36Sopenharmony_ci cmp $16, LEN 256062306a36Sopenharmony_ci jge .Lcbc_dec_loop1 256162306a36Sopenharmony_ci.Lcbc_dec_ret: 256262306a36Sopenharmony_ci movups IV, (IVP) 256362306a36Sopenharmony_ci.Lcbc_dec_just_ret: 256462306a36Sopenharmony_ci#ifndef __x86_64__ 256562306a36Sopenharmony_ci popl KLEN 256662306a36Sopenharmony_ci popl KEYP 256762306a36Sopenharmony_ci popl LEN 256862306a36Sopenharmony_ci popl IVP 256962306a36Sopenharmony_ci#endif 257062306a36Sopenharmony_ci FRAME_END 257162306a36Sopenharmony_ci RET 257262306a36Sopenharmony_ciSYM_FUNC_END(aesni_cbc_dec) 257362306a36Sopenharmony_ci 257462306a36Sopenharmony_ci/* 257562306a36Sopenharmony_ci * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 257662306a36Sopenharmony_ci * size_t len, u8 *iv) 257762306a36Sopenharmony_ci */ 257862306a36Sopenharmony_ciSYM_FUNC_START(aesni_cts_cbc_enc) 257962306a36Sopenharmony_ci FRAME_BEGIN 258062306a36Sopenharmony_ci#ifndef __x86_64__ 258162306a36Sopenharmony_ci pushl IVP 258262306a36Sopenharmony_ci pushl LEN 258362306a36Sopenharmony_ci pushl KEYP 258462306a36Sopenharmony_ci pushl KLEN 258562306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 258662306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 258762306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 258862306a36Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 258962306a36Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 259062306a36Sopenharmony_ci lea .Lcts_permute_table, T1 259162306a36Sopenharmony_ci#else 259262306a36Sopenharmony_ci lea .Lcts_permute_table(%rip), T1 259362306a36Sopenharmony_ci#endif 259462306a36Sopenharmony_ci mov 480(KEYP), KLEN 259562306a36Sopenharmony_ci movups (IVP), STATE 259662306a36Sopenharmony_ci sub $16, LEN 259762306a36Sopenharmony_ci mov T1, IVP 259862306a36Sopenharmony_ci add $32, IVP 259962306a36Sopenharmony_ci add LEN, T1 260062306a36Sopenharmony_ci sub LEN, IVP 260162306a36Sopenharmony_ci movups (T1), %xmm4 260262306a36Sopenharmony_ci movups (IVP), %xmm5 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci movups (INP), IN1 260562306a36Sopenharmony_ci add LEN, INP 260662306a36Sopenharmony_ci movups (INP), IN2 260762306a36Sopenharmony_ci 260862306a36Sopenharmony_ci pxor IN1, STATE 260962306a36Sopenharmony_ci call _aesni_enc1 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci pshufb %xmm5, IN2 261262306a36Sopenharmony_ci pxor STATE, IN2 261362306a36Sopenharmony_ci pshufb %xmm4, STATE 261462306a36Sopenharmony_ci add OUTP, LEN 261562306a36Sopenharmony_ci movups STATE, (LEN) 261662306a36Sopenharmony_ci 261762306a36Sopenharmony_ci movaps IN2, STATE 261862306a36Sopenharmony_ci call _aesni_enc1 261962306a36Sopenharmony_ci movups STATE, (OUTP) 262062306a36Sopenharmony_ci 262162306a36Sopenharmony_ci#ifndef __x86_64__ 262262306a36Sopenharmony_ci popl KLEN 262362306a36Sopenharmony_ci popl KEYP 262462306a36Sopenharmony_ci popl LEN 262562306a36Sopenharmony_ci popl IVP 262662306a36Sopenharmony_ci#endif 262762306a36Sopenharmony_ci FRAME_END 262862306a36Sopenharmony_ci RET 262962306a36Sopenharmony_ciSYM_FUNC_END(aesni_cts_cbc_enc) 263062306a36Sopenharmony_ci 263162306a36Sopenharmony_ci/* 263262306a36Sopenharmony_ci * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 263362306a36Sopenharmony_ci * size_t len, u8 *iv) 263462306a36Sopenharmony_ci */ 263562306a36Sopenharmony_ciSYM_FUNC_START(aesni_cts_cbc_dec) 263662306a36Sopenharmony_ci FRAME_BEGIN 263762306a36Sopenharmony_ci#ifndef __x86_64__ 263862306a36Sopenharmony_ci pushl IVP 263962306a36Sopenharmony_ci pushl LEN 264062306a36Sopenharmony_ci pushl KEYP 264162306a36Sopenharmony_ci pushl KLEN 264262306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 264362306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 264462306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 264562306a36Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 264662306a36Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 264762306a36Sopenharmony_ci lea .Lcts_permute_table, T1 264862306a36Sopenharmony_ci#else 264962306a36Sopenharmony_ci lea .Lcts_permute_table(%rip), T1 265062306a36Sopenharmony_ci#endif 265162306a36Sopenharmony_ci mov 480(KEYP), KLEN 265262306a36Sopenharmony_ci add $240, KEYP 265362306a36Sopenharmony_ci movups (IVP), IV 265462306a36Sopenharmony_ci sub $16, LEN 265562306a36Sopenharmony_ci mov T1, IVP 265662306a36Sopenharmony_ci add $32, IVP 265762306a36Sopenharmony_ci add LEN, T1 265862306a36Sopenharmony_ci sub LEN, IVP 265962306a36Sopenharmony_ci movups (T1), %xmm4 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci movups (INP), STATE 266262306a36Sopenharmony_ci add LEN, INP 266362306a36Sopenharmony_ci movups (INP), IN1 266462306a36Sopenharmony_ci 266562306a36Sopenharmony_ci call _aesni_dec1 266662306a36Sopenharmony_ci movaps STATE, IN2 266762306a36Sopenharmony_ci pshufb %xmm4, STATE 266862306a36Sopenharmony_ci pxor IN1, STATE 266962306a36Sopenharmony_ci 267062306a36Sopenharmony_ci add OUTP, LEN 267162306a36Sopenharmony_ci movups STATE, (LEN) 267262306a36Sopenharmony_ci 267362306a36Sopenharmony_ci movups (IVP), %xmm0 267462306a36Sopenharmony_ci pshufb %xmm0, IN1 267562306a36Sopenharmony_ci pblendvb IN2, IN1 267662306a36Sopenharmony_ci movaps IN1, STATE 267762306a36Sopenharmony_ci call _aesni_dec1 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_ci pxor IV, STATE 268062306a36Sopenharmony_ci movups STATE, (OUTP) 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_ci#ifndef __x86_64__ 268362306a36Sopenharmony_ci popl KLEN 268462306a36Sopenharmony_ci popl KEYP 268562306a36Sopenharmony_ci popl LEN 268662306a36Sopenharmony_ci popl IVP 268762306a36Sopenharmony_ci#endif 268862306a36Sopenharmony_ci FRAME_END 268962306a36Sopenharmony_ci RET 269062306a36Sopenharmony_ciSYM_FUNC_END(aesni_cts_cbc_dec) 269162306a36Sopenharmony_ci 269262306a36Sopenharmony_ci.pushsection .rodata 269362306a36Sopenharmony_ci.align 16 269462306a36Sopenharmony_ci.Lcts_permute_table: 269562306a36Sopenharmony_ci .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 269662306a36Sopenharmony_ci .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 269762306a36Sopenharmony_ci .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 269862306a36Sopenharmony_ci .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 269962306a36Sopenharmony_ci .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 270062306a36Sopenharmony_ci .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 270162306a36Sopenharmony_ci#ifdef __x86_64__ 270262306a36Sopenharmony_ci.Lbswap_mask: 270362306a36Sopenharmony_ci .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 270462306a36Sopenharmony_ci#endif 270562306a36Sopenharmony_ci.popsection 270662306a36Sopenharmony_ci 270762306a36Sopenharmony_ci#ifdef __x86_64__ 270862306a36Sopenharmony_ci/* 270962306a36Sopenharmony_ci * _aesni_inc_init: internal ABI 271062306a36Sopenharmony_ci * setup registers used by _aesni_inc 271162306a36Sopenharmony_ci * input: 271262306a36Sopenharmony_ci * IV 271362306a36Sopenharmony_ci * output: 271462306a36Sopenharmony_ci * CTR: == IV, in little endian 271562306a36Sopenharmony_ci * TCTR_LOW: == lower qword of CTR 271662306a36Sopenharmony_ci * INC: == 1, in little endian 271762306a36Sopenharmony_ci * BSWAP_MASK == endian swapping mask 271862306a36Sopenharmony_ci */ 271962306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_inc_init) 272062306a36Sopenharmony_ci movaps .Lbswap_mask(%rip), BSWAP_MASK 272162306a36Sopenharmony_ci movaps IV, CTR 272262306a36Sopenharmony_ci pshufb BSWAP_MASK, CTR 272362306a36Sopenharmony_ci mov $1, TCTR_LOW 272462306a36Sopenharmony_ci movq TCTR_LOW, INC 272562306a36Sopenharmony_ci movq CTR, TCTR_LOW 272662306a36Sopenharmony_ci RET 272762306a36Sopenharmony_ciSYM_FUNC_END(_aesni_inc_init) 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_ci/* 273062306a36Sopenharmony_ci * _aesni_inc: internal ABI 273162306a36Sopenharmony_ci * Increase IV by 1, IV is in big endian 273262306a36Sopenharmony_ci * input: 273362306a36Sopenharmony_ci * IV 273462306a36Sopenharmony_ci * CTR: == IV, in little endian 273562306a36Sopenharmony_ci * TCTR_LOW: == lower qword of CTR 273662306a36Sopenharmony_ci * INC: == 1, in little endian 273762306a36Sopenharmony_ci * BSWAP_MASK == endian swapping mask 273862306a36Sopenharmony_ci * output: 273962306a36Sopenharmony_ci * IV: Increase by 1 274062306a36Sopenharmony_ci * changed: 274162306a36Sopenharmony_ci * CTR: == output IV, in little endian 274262306a36Sopenharmony_ci * TCTR_LOW: == lower qword of CTR 274362306a36Sopenharmony_ci */ 274462306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(_aesni_inc) 274562306a36Sopenharmony_ci paddq INC, CTR 274662306a36Sopenharmony_ci add $1, TCTR_LOW 274762306a36Sopenharmony_ci jnc .Linc_low 274862306a36Sopenharmony_ci pslldq $8, INC 274962306a36Sopenharmony_ci paddq INC, CTR 275062306a36Sopenharmony_ci psrldq $8, INC 275162306a36Sopenharmony_ci.Linc_low: 275262306a36Sopenharmony_ci movaps CTR, IV 275362306a36Sopenharmony_ci pshufb BSWAP_MASK, IV 275462306a36Sopenharmony_ci RET 275562306a36Sopenharmony_ciSYM_FUNC_END(_aesni_inc) 275662306a36Sopenharmony_ci 275762306a36Sopenharmony_ci/* 275862306a36Sopenharmony_ci * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 275962306a36Sopenharmony_ci * size_t len, u8 *iv) 276062306a36Sopenharmony_ci */ 276162306a36Sopenharmony_ciSYM_FUNC_START(aesni_ctr_enc) 276262306a36Sopenharmony_ci FRAME_BEGIN 276362306a36Sopenharmony_ci cmp $16, LEN 276462306a36Sopenharmony_ci jb .Lctr_enc_just_ret 276562306a36Sopenharmony_ci mov 480(KEYP), KLEN 276662306a36Sopenharmony_ci movups (IVP), IV 276762306a36Sopenharmony_ci call _aesni_inc_init 276862306a36Sopenharmony_ci cmp $64, LEN 276962306a36Sopenharmony_ci jb .Lctr_enc_loop1 277062306a36Sopenharmony_ci.align 4 277162306a36Sopenharmony_ci.Lctr_enc_loop4: 277262306a36Sopenharmony_ci movaps IV, STATE1 277362306a36Sopenharmony_ci call _aesni_inc 277462306a36Sopenharmony_ci movups (INP), IN1 277562306a36Sopenharmony_ci movaps IV, STATE2 277662306a36Sopenharmony_ci call _aesni_inc 277762306a36Sopenharmony_ci movups 0x10(INP), IN2 277862306a36Sopenharmony_ci movaps IV, STATE3 277962306a36Sopenharmony_ci call _aesni_inc 278062306a36Sopenharmony_ci movups 0x20(INP), IN3 278162306a36Sopenharmony_ci movaps IV, STATE4 278262306a36Sopenharmony_ci call _aesni_inc 278362306a36Sopenharmony_ci movups 0x30(INP), IN4 278462306a36Sopenharmony_ci call _aesni_enc4 278562306a36Sopenharmony_ci pxor IN1, STATE1 278662306a36Sopenharmony_ci movups STATE1, (OUTP) 278762306a36Sopenharmony_ci pxor IN2, STATE2 278862306a36Sopenharmony_ci movups STATE2, 0x10(OUTP) 278962306a36Sopenharmony_ci pxor IN3, STATE3 279062306a36Sopenharmony_ci movups STATE3, 0x20(OUTP) 279162306a36Sopenharmony_ci pxor IN4, STATE4 279262306a36Sopenharmony_ci movups STATE4, 0x30(OUTP) 279362306a36Sopenharmony_ci sub $64, LEN 279462306a36Sopenharmony_ci add $64, INP 279562306a36Sopenharmony_ci add $64, OUTP 279662306a36Sopenharmony_ci cmp $64, LEN 279762306a36Sopenharmony_ci jge .Lctr_enc_loop4 279862306a36Sopenharmony_ci cmp $16, LEN 279962306a36Sopenharmony_ci jb .Lctr_enc_ret 280062306a36Sopenharmony_ci.align 4 280162306a36Sopenharmony_ci.Lctr_enc_loop1: 280262306a36Sopenharmony_ci movaps IV, STATE 280362306a36Sopenharmony_ci call _aesni_inc 280462306a36Sopenharmony_ci movups (INP), IN 280562306a36Sopenharmony_ci call _aesni_enc1 280662306a36Sopenharmony_ci pxor IN, STATE 280762306a36Sopenharmony_ci movups STATE, (OUTP) 280862306a36Sopenharmony_ci sub $16, LEN 280962306a36Sopenharmony_ci add $16, INP 281062306a36Sopenharmony_ci add $16, OUTP 281162306a36Sopenharmony_ci cmp $16, LEN 281262306a36Sopenharmony_ci jge .Lctr_enc_loop1 281362306a36Sopenharmony_ci.Lctr_enc_ret: 281462306a36Sopenharmony_ci movups IV, (IVP) 281562306a36Sopenharmony_ci.Lctr_enc_just_ret: 281662306a36Sopenharmony_ci FRAME_END 281762306a36Sopenharmony_ci RET 281862306a36Sopenharmony_ciSYM_FUNC_END(aesni_ctr_enc) 281962306a36Sopenharmony_ci 282062306a36Sopenharmony_ci#endif 282162306a36Sopenharmony_ci 282262306a36Sopenharmony_ci.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 282362306a36Sopenharmony_ci.align 16 282462306a36Sopenharmony_ci.Lgf128mul_x_ble_mask: 282562306a36Sopenharmony_ci .octa 0x00000000000000010000000000000087 282662306a36Sopenharmony_ci.previous 282762306a36Sopenharmony_ci 282862306a36Sopenharmony_ci/* 282962306a36Sopenharmony_ci * _aesni_gf128mul_x_ble: internal ABI 283062306a36Sopenharmony_ci * Multiply in GF(2^128) for XTS IVs 283162306a36Sopenharmony_ci * input: 283262306a36Sopenharmony_ci * IV: current IV 283362306a36Sopenharmony_ci * GF128MUL_MASK == mask with 0x87 and 0x01 283462306a36Sopenharmony_ci * output: 283562306a36Sopenharmony_ci * IV: next IV 283662306a36Sopenharmony_ci * changed: 283762306a36Sopenharmony_ci * CTR: == temporary value 283862306a36Sopenharmony_ci */ 283962306a36Sopenharmony_ci#define _aesni_gf128mul_x_ble() \ 284062306a36Sopenharmony_ci pshufd $0x13, IV, KEY; \ 284162306a36Sopenharmony_ci paddq IV, IV; \ 284262306a36Sopenharmony_ci psrad $31, KEY; \ 284362306a36Sopenharmony_ci pand GF128MUL_MASK, KEY; \ 284462306a36Sopenharmony_ci pxor KEY, IV; 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ci/* 284762306a36Sopenharmony_ci * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, 284862306a36Sopenharmony_ci * const u8 *src, unsigned int len, le128 *iv) 284962306a36Sopenharmony_ci */ 285062306a36Sopenharmony_ciSYM_FUNC_START(aesni_xts_encrypt) 285162306a36Sopenharmony_ci FRAME_BEGIN 285262306a36Sopenharmony_ci#ifndef __x86_64__ 285362306a36Sopenharmony_ci pushl IVP 285462306a36Sopenharmony_ci pushl LEN 285562306a36Sopenharmony_ci pushl KEYP 285662306a36Sopenharmony_ci pushl KLEN 285762306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 285862306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 285962306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 286062306a36Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 286162306a36Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 286262306a36Sopenharmony_ci movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 286362306a36Sopenharmony_ci#else 286462306a36Sopenharmony_ci movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK 286562306a36Sopenharmony_ci#endif 286662306a36Sopenharmony_ci movups (IVP), IV 286762306a36Sopenharmony_ci 286862306a36Sopenharmony_ci mov 480(KEYP), KLEN 286962306a36Sopenharmony_ci 287062306a36Sopenharmony_ci.Lxts_enc_loop4: 287162306a36Sopenharmony_ci sub $64, LEN 287262306a36Sopenharmony_ci jl .Lxts_enc_1x 287362306a36Sopenharmony_ci 287462306a36Sopenharmony_ci movdqa IV, STATE1 287562306a36Sopenharmony_ci movdqu 0x00(INP), IN 287662306a36Sopenharmony_ci pxor IN, STATE1 287762306a36Sopenharmony_ci movdqu IV, 0x00(OUTP) 287862306a36Sopenharmony_ci 287962306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 288062306a36Sopenharmony_ci movdqa IV, STATE2 288162306a36Sopenharmony_ci movdqu 0x10(INP), IN 288262306a36Sopenharmony_ci pxor IN, STATE2 288362306a36Sopenharmony_ci movdqu IV, 0x10(OUTP) 288462306a36Sopenharmony_ci 288562306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 288662306a36Sopenharmony_ci movdqa IV, STATE3 288762306a36Sopenharmony_ci movdqu 0x20(INP), IN 288862306a36Sopenharmony_ci pxor IN, STATE3 288962306a36Sopenharmony_ci movdqu IV, 0x20(OUTP) 289062306a36Sopenharmony_ci 289162306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 289262306a36Sopenharmony_ci movdqa IV, STATE4 289362306a36Sopenharmony_ci movdqu 0x30(INP), IN 289462306a36Sopenharmony_ci pxor IN, STATE4 289562306a36Sopenharmony_ci movdqu IV, 0x30(OUTP) 289662306a36Sopenharmony_ci 289762306a36Sopenharmony_ci call _aesni_enc4 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_ci movdqu 0x00(OUTP), IN 290062306a36Sopenharmony_ci pxor IN, STATE1 290162306a36Sopenharmony_ci movdqu STATE1, 0x00(OUTP) 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_ci movdqu 0x10(OUTP), IN 290462306a36Sopenharmony_ci pxor IN, STATE2 290562306a36Sopenharmony_ci movdqu STATE2, 0x10(OUTP) 290662306a36Sopenharmony_ci 290762306a36Sopenharmony_ci movdqu 0x20(OUTP), IN 290862306a36Sopenharmony_ci pxor IN, STATE3 290962306a36Sopenharmony_ci movdqu STATE3, 0x20(OUTP) 291062306a36Sopenharmony_ci 291162306a36Sopenharmony_ci movdqu 0x30(OUTP), IN 291262306a36Sopenharmony_ci pxor IN, STATE4 291362306a36Sopenharmony_ci movdqu STATE4, 0x30(OUTP) 291462306a36Sopenharmony_ci 291562306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci add $64, INP 291862306a36Sopenharmony_ci add $64, OUTP 291962306a36Sopenharmony_ci test LEN, LEN 292062306a36Sopenharmony_ci jnz .Lxts_enc_loop4 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_ci.Lxts_enc_ret_iv: 292362306a36Sopenharmony_ci movups IV, (IVP) 292462306a36Sopenharmony_ci 292562306a36Sopenharmony_ci.Lxts_enc_ret: 292662306a36Sopenharmony_ci#ifndef __x86_64__ 292762306a36Sopenharmony_ci popl KLEN 292862306a36Sopenharmony_ci popl KEYP 292962306a36Sopenharmony_ci popl LEN 293062306a36Sopenharmony_ci popl IVP 293162306a36Sopenharmony_ci#endif 293262306a36Sopenharmony_ci FRAME_END 293362306a36Sopenharmony_ci RET 293462306a36Sopenharmony_ci 293562306a36Sopenharmony_ci.Lxts_enc_1x: 293662306a36Sopenharmony_ci add $64, LEN 293762306a36Sopenharmony_ci jz .Lxts_enc_ret_iv 293862306a36Sopenharmony_ci sub $16, LEN 293962306a36Sopenharmony_ci jl .Lxts_enc_cts4 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_ci.Lxts_enc_loop1: 294262306a36Sopenharmony_ci movdqu (INP), STATE 294362306a36Sopenharmony_ci pxor IV, STATE 294462306a36Sopenharmony_ci call _aesni_enc1 294562306a36Sopenharmony_ci pxor IV, STATE 294662306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 294762306a36Sopenharmony_ci 294862306a36Sopenharmony_ci test LEN, LEN 294962306a36Sopenharmony_ci jz .Lxts_enc_out 295062306a36Sopenharmony_ci 295162306a36Sopenharmony_ci add $16, INP 295262306a36Sopenharmony_ci sub $16, LEN 295362306a36Sopenharmony_ci jl .Lxts_enc_cts1 295462306a36Sopenharmony_ci 295562306a36Sopenharmony_ci movdqu STATE, (OUTP) 295662306a36Sopenharmony_ci add $16, OUTP 295762306a36Sopenharmony_ci jmp .Lxts_enc_loop1 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_ci.Lxts_enc_out: 296062306a36Sopenharmony_ci movdqu STATE, (OUTP) 296162306a36Sopenharmony_ci jmp .Lxts_enc_ret_iv 296262306a36Sopenharmony_ci 296362306a36Sopenharmony_ci.Lxts_enc_cts4: 296462306a36Sopenharmony_ci movdqa STATE4, STATE 296562306a36Sopenharmony_ci sub $16, OUTP 296662306a36Sopenharmony_ci 296762306a36Sopenharmony_ci.Lxts_enc_cts1: 296862306a36Sopenharmony_ci#ifndef __x86_64__ 296962306a36Sopenharmony_ci lea .Lcts_permute_table, T1 297062306a36Sopenharmony_ci#else 297162306a36Sopenharmony_ci lea .Lcts_permute_table(%rip), T1 297262306a36Sopenharmony_ci#endif 297362306a36Sopenharmony_ci add LEN, INP /* rewind input pointer */ 297462306a36Sopenharmony_ci add $16, LEN /* # bytes in final block */ 297562306a36Sopenharmony_ci movups (INP), IN1 297662306a36Sopenharmony_ci 297762306a36Sopenharmony_ci mov T1, IVP 297862306a36Sopenharmony_ci add $32, IVP 297962306a36Sopenharmony_ci add LEN, T1 298062306a36Sopenharmony_ci sub LEN, IVP 298162306a36Sopenharmony_ci add OUTP, LEN 298262306a36Sopenharmony_ci 298362306a36Sopenharmony_ci movups (T1), %xmm4 298462306a36Sopenharmony_ci movaps STATE, IN2 298562306a36Sopenharmony_ci pshufb %xmm4, STATE 298662306a36Sopenharmony_ci movups STATE, (LEN) 298762306a36Sopenharmony_ci 298862306a36Sopenharmony_ci movups (IVP), %xmm0 298962306a36Sopenharmony_ci pshufb %xmm0, IN1 299062306a36Sopenharmony_ci pblendvb IN2, IN1 299162306a36Sopenharmony_ci movaps IN1, STATE 299262306a36Sopenharmony_ci 299362306a36Sopenharmony_ci pxor IV, STATE 299462306a36Sopenharmony_ci call _aesni_enc1 299562306a36Sopenharmony_ci pxor IV, STATE 299662306a36Sopenharmony_ci 299762306a36Sopenharmony_ci movups STATE, (OUTP) 299862306a36Sopenharmony_ci jmp .Lxts_enc_ret 299962306a36Sopenharmony_ciSYM_FUNC_END(aesni_xts_encrypt) 300062306a36Sopenharmony_ci 300162306a36Sopenharmony_ci/* 300262306a36Sopenharmony_ci * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, 300362306a36Sopenharmony_ci * const u8 *src, unsigned int len, le128 *iv) 300462306a36Sopenharmony_ci */ 300562306a36Sopenharmony_ciSYM_FUNC_START(aesni_xts_decrypt) 300662306a36Sopenharmony_ci FRAME_BEGIN 300762306a36Sopenharmony_ci#ifndef __x86_64__ 300862306a36Sopenharmony_ci pushl IVP 300962306a36Sopenharmony_ci pushl LEN 301062306a36Sopenharmony_ci pushl KEYP 301162306a36Sopenharmony_ci pushl KLEN 301262306a36Sopenharmony_ci movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 301362306a36Sopenharmony_ci movl (FRAME_OFFSET+24)(%esp), OUTP # dst 301462306a36Sopenharmony_ci movl (FRAME_OFFSET+28)(%esp), INP # src 301562306a36Sopenharmony_ci movl (FRAME_OFFSET+32)(%esp), LEN # len 301662306a36Sopenharmony_ci movl (FRAME_OFFSET+36)(%esp), IVP # iv 301762306a36Sopenharmony_ci movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 301862306a36Sopenharmony_ci#else 301962306a36Sopenharmony_ci movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK 302062306a36Sopenharmony_ci#endif 302162306a36Sopenharmony_ci movups (IVP), IV 302262306a36Sopenharmony_ci 302362306a36Sopenharmony_ci mov 480(KEYP), KLEN 302462306a36Sopenharmony_ci add $240, KEYP 302562306a36Sopenharmony_ci 302662306a36Sopenharmony_ci test $15, LEN 302762306a36Sopenharmony_ci jz .Lxts_dec_loop4 302862306a36Sopenharmony_ci sub $16, LEN 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci.Lxts_dec_loop4: 303162306a36Sopenharmony_ci sub $64, LEN 303262306a36Sopenharmony_ci jl .Lxts_dec_1x 303362306a36Sopenharmony_ci 303462306a36Sopenharmony_ci movdqa IV, STATE1 303562306a36Sopenharmony_ci movdqu 0x00(INP), IN 303662306a36Sopenharmony_ci pxor IN, STATE1 303762306a36Sopenharmony_ci movdqu IV, 0x00(OUTP) 303862306a36Sopenharmony_ci 303962306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 304062306a36Sopenharmony_ci movdqa IV, STATE2 304162306a36Sopenharmony_ci movdqu 0x10(INP), IN 304262306a36Sopenharmony_ci pxor IN, STATE2 304362306a36Sopenharmony_ci movdqu IV, 0x10(OUTP) 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 304662306a36Sopenharmony_ci movdqa IV, STATE3 304762306a36Sopenharmony_ci movdqu 0x20(INP), IN 304862306a36Sopenharmony_ci pxor IN, STATE3 304962306a36Sopenharmony_ci movdqu IV, 0x20(OUTP) 305062306a36Sopenharmony_ci 305162306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 305262306a36Sopenharmony_ci movdqa IV, STATE4 305362306a36Sopenharmony_ci movdqu 0x30(INP), IN 305462306a36Sopenharmony_ci pxor IN, STATE4 305562306a36Sopenharmony_ci movdqu IV, 0x30(OUTP) 305662306a36Sopenharmony_ci 305762306a36Sopenharmony_ci call _aesni_dec4 305862306a36Sopenharmony_ci 305962306a36Sopenharmony_ci movdqu 0x00(OUTP), IN 306062306a36Sopenharmony_ci pxor IN, STATE1 306162306a36Sopenharmony_ci movdqu STATE1, 0x00(OUTP) 306262306a36Sopenharmony_ci 306362306a36Sopenharmony_ci movdqu 0x10(OUTP), IN 306462306a36Sopenharmony_ci pxor IN, STATE2 306562306a36Sopenharmony_ci movdqu STATE2, 0x10(OUTP) 306662306a36Sopenharmony_ci 306762306a36Sopenharmony_ci movdqu 0x20(OUTP), IN 306862306a36Sopenharmony_ci pxor IN, STATE3 306962306a36Sopenharmony_ci movdqu STATE3, 0x20(OUTP) 307062306a36Sopenharmony_ci 307162306a36Sopenharmony_ci movdqu 0x30(OUTP), IN 307262306a36Sopenharmony_ci pxor IN, STATE4 307362306a36Sopenharmony_ci movdqu STATE4, 0x30(OUTP) 307462306a36Sopenharmony_ci 307562306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci add $64, INP 307862306a36Sopenharmony_ci add $64, OUTP 307962306a36Sopenharmony_ci test LEN, LEN 308062306a36Sopenharmony_ci jnz .Lxts_dec_loop4 308162306a36Sopenharmony_ci 308262306a36Sopenharmony_ci.Lxts_dec_ret_iv: 308362306a36Sopenharmony_ci movups IV, (IVP) 308462306a36Sopenharmony_ci 308562306a36Sopenharmony_ci.Lxts_dec_ret: 308662306a36Sopenharmony_ci#ifndef __x86_64__ 308762306a36Sopenharmony_ci popl KLEN 308862306a36Sopenharmony_ci popl KEYP 308962306a36Sopenharmony_ci popl LEN 309062306a36Sopenharmony_ci popl IVP 309162306a36Sopenharmony_ci#endif 309262306a36Sopenharmony_ci FRAME_END 309362306a36Sopenharmony_ci RET 309462306a36Sopenharmony_ci 309562306a36Sopenharmony_ci.Lxts_dec_1x: 309662306a36Sopenharmony_ci add $64, LEN 309762306a36Sopenharmony_ci jz .Lxts_dec_ret_iv 309862306a36Sopenharmony_ci 309962306a36Sopenharmony_ci.Lxts_dec_loop1: 310062306a36Sopenharmony_ci movdqu (INP), STATE 310162306a36Sopenharmony_ci 310262306a36Sopenharmony_ci add $16, INP 310362306a36Sopenharmony_ci sub $16, LEN 310462306a36Sopenharmony_ci jl .Lxts_dec_cts1 310562306a36Sopenharmony_ci 310662306a36Sopenharmony_ci pxor IV, STATE 310762306a36Sopenharmony_ci call _aesni_dec1 310862306a36Sopenharmony_ci pxor IV, STATE 310962306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 311062306a36Sopenharmony_ci 311162306a36Sopenharmony_ci test LEN, LEN 311262306a36Sopenharmony_ci jz .Lxts_dec_out 311362306a36Sopenharmony_ci 311462306a36Sopenharmony_ci movdqu STATE, (OUTP) 311562306a36Sopenharmony_ci add $16, OUTP 311662306a36Sopenharmony_ci jmp .Lxts_dec_loop1 311762306a36Sopenharmony_ci 311862306a36Sopenharmony_ci.Lxts_dec_out: 311962306a36Sopenharmony_ci movdqu STATE, (OUTP) 312062306a36Sopenharmony_ci jmp .Lxts_dec_ret_iv 312162306a36Sopenharmony_ci 312262306a36Sopenharmony_ci.Lxts_dec_cts1: 312362306a36Sopenharmony_ci movdqa IV, STATE4 312462306a36Sopenharmony_ci _aesni_gf128mul_x_ble() 312562306a36Sopenharmony_ci 312662306a36Sopenharmony_ci pxor IV, STATE 312762306a36Sopenharmony_ci call _aesni_dec1 312862306a36Sopenharmony_ci pxor IV, STATE 312962306a36Sopenharmony_ci 313062306a36Sopenharmony_ci#ifndef __x86_64__ 313162306a36Sopenharmony_ci lea .Lcts_permute_table, T1 313262306a36Sopenharmony_ci#else 313362306a36Sopenharmony_ci lea .Lcts_permute_table(%rip), T1 313462306a36Sopenharmony_ci#endif 313562306a36Sopenharmony_ci add LEN, INP /* rewind input pointer */ 313662306a36Sopenharmony_ci add $16, LEN /* # bytes in final block */ 313762306a36Sopenharmony_ci movups (INP), IN1 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_ci mov T1, IVP 314062306a36Sopenharmony_ci add $32, IVP 314162306a36Sopenharmony_ci add LEN, T1 314262306a36Sopenharmony_ci sub LEN, IVP 314362306a36Sopenharmony_ci add OUTP, LEN 314462306a36Sopenharmony_ci 314562306a36Sopenharmony_ci movups (T1), %xmm4 314662306a36Sopenharmony_ci movaps STATE, IN2 314762306a36Sopenharmony_ci pshufb %xmm4, STATE 314862306a36Sopenharmony_ci movups STATE, (LEN) 314962306a36Sopenharmony_ci 315062306a36Sopenharmony_ci movups (IVP), %xmm0 315162306a36Sopenharmony_ci pshufb %xmm0, IN1 315262306a36Sopenharmony_ci pblendvb IN2, IN1 315362306a36Sopenharmony_ci movaps IN1, STATE 315462306a36Sopenharmony_ci 315562306a36Sopenharmony_ci pxor STATE4, STATE 315662306a36Sopenharmony_ci call _aesni_dec1 315762306a36Sopenharmony_ci pxor STATE4, STATE 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci movups STATE, (OUTP) 316062306a36Sopenharmony_ci jmp .Lxts_dec_ret 316162306a36Sopenharmony_ciSYM_FUNC_END(aesni_xts_decrypt) 3162