162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * AES-NI + SSE2 implementation of AEGIS-128 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> 662306a36Sopenharmony_ci * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/linkage.h> 1062306a36Sopenharmony_ci#include <linux/cfi_types.h> 1162306a36Sopenharmony_ci#include <asm/frame.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#define STATE0 %xmm0 1462306a36Sopenharmony_ci#define STATE1 %xmm1 1562306a36Sopenharmony_ci#define STATE2 %xmm2 1662306a36Sopenharmony_ci#define STATE3 %xmm3 1762306a36Sopenharmony_ci#define STATE4 %xmm4 1862306a36Sopenharmony_ci#define KEY %xmm5 1962306a36Sopenharmony_ci#define MSG %xmm5 2062306a36Sopenharmony_ci#define T0 %xmm6 2162306a36Sopenharmony_ci#define T1 %xmm7 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#define STATEP %rdi 2462306a36Sopenharmony_ci#define LEN %rsi 2562306a36Sopenharmony_ci#define SRC %rdx 2662306a36Sopenharmony_ci#define DST %rcx 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci.section .rodata.cst16.aegis128_const, "aM", @progbits, 32 2962306a36Sopenharmony_ci.align 16 3062306a36Sopenharmony_ci.Laegis128_const_0: 3162306a36Sopenharmony_ci .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d 3262306a36Sopenharmony_ci .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 3362306a36Sopenharmony_ci.Laegis128_const_1: 3462306a36Sopenharmony_ci .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 3562306a36Sopenharmony_ci .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 3862306a36Sopenharmony_ci.align 16 3962306a36Sopenharmony_ci.Laegis128_counter: 4062306a36Sopenharmony_ci .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 4162306a36Sopenharmony_ci .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci.text 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci/* 4662306a36Sopenharmony_ci * aegis128_update 4762306a36Sopenharmony_ci * input: 4862306a36Sopenharmony_ci * STATE[0-4] - input state 4962306a36Sopenharmony_ci * output: 5062306a36Sopenharmony_ci * STATE[0-4] - output state (shifted positions) 5162306a36Sopenharmony_ci * changed: 5262306a36Sopenharmony_ci * T0 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_ci.macro aegis128_update 5562306a36Sopenharmony_ci movdqa STATE4, T0 5662306a36Sopenharmony_ci aesenc STATE0, STATE4 5762306a36Sopenharmony_ci aesenc STATE1, STATE0 5862306a36Sopenharmony_ci aesenc STATE2, STATE1 5962306a36Sopenharmony_ci aesenc STATE3, STATE2 6062306a36Sopenharmony_ci aesenc T0, STATE3 6162306a36Sopenharmony_ci.endm 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci/* 6462306a36Sopenharmony_ci * __load_partial: internal ABI 6562306a36Sopenharmony_ci * input: 6662306a36Sopenharmony_ci * LEN - bytes 6762306a36Sopenharmony_ci * SRC - src 6862306a36Sopenharmony_ci * output: 6962306a36Sopenharmony_ci * MSG - message block 7062306a36Sopenharmony_ci * changed: 7162306a36Sopenharmony_ci * T0 7262306a36Sopenharmony_ci * %r8 7362306a36Sopenharmony_ci * %r9 7462306a36Sopenharmony_ci */ 7562306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__load_partial) 7662306a36Sopenharmony_ci xor %r9d, %r9d 7762306a36Sopenharmony_ci pxor MSG, MSG 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci mov LEN, %r8 8062306a36Sopenharmony_ci and $0x1, %r8 8162306a36Sopenharmony_ci jz .Lld_partial_1 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci mov LEN, %r8 8462306a36Sopenharmony_ci and $0x1E, %r8 8562306a36Sopenharmony_ci add SRC, %r8 8662306a36Sopenharmony_ci mov (%r8), %r9b 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci.Lld_partial_1: 8962306a36Sopenharmony_ci mov LEN, %r8 9062306a36Sopenharmony_ci and $0x2, %r8 9162306a36Sopenharmony_ci jz .Lld_partial_2 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci mov LEN, %r8 9462306a36Sopenharmony_ci and $0x1C, %r8 9562306a36Sopenharmony_ci add SRC, %r8 9662306a36Sopenharmony_ci shl $0x10, %r9 9762306a36Sopenharmony_ci mov (%r8), %r9w 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci.Lld_partial_2: 10062306a36Sopenharmony_ci mov LEN, %r8 10162306a36Sopenharmony_ci and $0x4, %r8 10262306a36Sopenharmony_ci jz .Lld_partial_4 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci mov LEN, %r8 10562306a36Sopenharmony_ci and $0x18, %r8 10662306a36Sopenharmony_ci add SRC, %r8 10762306a36Sopenharmony_ci shl $32, %r9 10862306a36Sopenharmony_ci mov (%r8), %r8d 10962306a36Sopenharmony_ci xor %r8, %r9 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci.Lld_partial_4: 11262306a36Sopenharmony_ci movq %r9, MSG 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci mov LEN, %r8 11562306a36Sopenharmony_ci and $0x8, %r8 11662306a36Sopenharmony_ci jz .Lld_partial_8 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci mov LEN, %r8 11962306a36Sopenharmony_ci and $0x10, %r8 12062306a36Sopenharmony_ci add SRC, %r8 12162306a36Sopenharmony_ci pslldq $8, MSG 12262306a36Sopenharmony_ci movq (%r8), T0 12362306a36Sopenharmony_ci pxor T0, MSG 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci.Lld_partial_8: 12662306a36Sopenharmony_ci RET 12762306a36Sopenharmony_ciSYM_FUNC_END(__load_partial) 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci/* 13062306a36Sopenharmony_ci * __store_partial: internal ABI 13162306a36Sopenharmony_ci * input: 13262306a36Sopenharmony_ci * LEN - bytes 13362306a36Sopenharmony_ci * DST - dst 13462306a36Sopenharmony_ci * output: 13562306a36Sopenharmony_ci * T0 - message block 13662306a36Sopenharmony_ci * changed: 13762306a36Sopenharmony_ci * %r8 13862306a36Sopenharmony_ci * %r9 13962306a36Sopenharmony_ci * %r10 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__store_partial) 14262306a36Sopenharmony_ci mov LEN, %r8 14362306a36Sopenharmony_ci mov DST, %r9 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci movq T0, %r10 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci cmp $8, %r8 14862306a36Sopenharmony_ci jl .Lst_partial_8 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci mov %r10, (%r9) 15162306a36Sopenharmony_ci psrldq $8, T0 15262306a36Sopenharmony_ci movq T0, %r10 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci sub $8, %r8 15562306a36Sopenharmony_ci add $8, %r9 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci.Lst_partial_8: 15862306a36Sopenharmony_ci cmp $4, %r8 15962306a36Sopenharmony_ci jl .Lst_partial_4 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci mov %r10d, (%r9) 16262306a36Sopenharmony_ci shr $32, %r10 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci sub $4, %r8 16562306a36Sopenharmony_ci add $4, %r9 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci.Lst_partial_4: 16862306a36Sopenharmony_ci cmp $2, %r8 16962306a36Sopenharmony_ci jl .Lst_partial_2 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci mov %r10w, (%r9) 17262306a36Sopenharmony_ci shr $0x10, %r10 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci sub $2, %r8 17562306a36Sopenharmony_ci add $2, %r9 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci.Lst_partial_2: 17862306a36Sopenharmony_ci cmp $1, %r8 17962306a36Sopenharmony_ci jl .Lst_partial_1 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci mov %r10b, (%r9) 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci.Lst_partial_1: 18462306a36Sopenharmony_ci RET 18562306a36Sopenharmony_ciSYM_FUNC_END(__store_partial) 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci/* 18862306a36Sopenharmony_ci * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_init) 19162306a36Sopenharmony_ci FRAME_BEGIN 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci /* load IV: */ 19462306a36Sopenharmony_ci movdqu (%rdx), T1 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* load key: */ 19762306a36Sopenharmony_ci movdqa (%rsi), KEY 19862306a36Sopenharmony_ci pxor KEY, T1 19962306a36Sopenharmony_ci movdqa T1, STATE0 20062306a36Sopenharmony_ci movdqa KEY, STATE3 20162306a36Sopenharmony_ci movdqa KEY, STATE4 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci /* load the constants: */ 20462306a36Sopenharmony_ci movdqa .Laegis128_const_0(%rip), STATE2 20562306a36Sopenharmony_ci movdqa .Laegis128_const_1(%rip), STATE1 20662306a36Sopenharmony_ci pxor STATE2, STATE3 20762306a36Sopenharmony_ci pxor STATE1, STATE4 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci /* update 10 times with KEY / KEY xor IV: */ 21062306a36Sopenharmony_ci aegis128_update; pxor KEY, STATE4 21162306a36Sopenharmony_ci aegis128_update; pxor T1, STATE3 21262306a36Sopenharmony_ci aegis128_update; pxor KEY, STATE2 21362306a36Sopenharmony_ci aegis128_update; pxor T1, STATE1 21462306a36Sopenharmony_ci aegis128_update; pxor KEY, STATE0 21562306a36Sopenharmony_ci aegis128_update; pxor T1, STATE4 21662306a36Sopenharmony_ci aegis128_update; pxor KEY, STATE3 21762306a36Sopenharmony_ci aegis128_update; pxor T1, STATE2 21862306a36Sopenharmony_ci aegis128_update; pxor KEY, STATE1 21962306a36Sopenharmony_ci aegis128_update; pxor T1, STATE0 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci /* store the state: */ 22262306a36Sopenharmony_ci movdqu STATE0, 0x00(STATEP) 22362306a36Sopenharmony_ci movdqu STATE1, 0x10(STATEP) 22462306a36Sopenharmony_ci movdqu STATE2, 0x20(STATEP) 22562306a36Sopenharmony_ci movdqu STATE3, 0x30(STATEP) 22662306a36Sopenharmony_ci movdqu STATE4, 0x40(STATEP) 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci FRAME_END 22962306a36Sopenharmony_ci RET 23062306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_init) 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci/* 23362306a36Sopenharmony_ci * void crypto_aegis128_aesni_ad(void *state, unsigned int length, 23462306a36Sopenharmony_ci * const void *data); 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_ad) 23762306a36Sopenharmony_ci FRAME_BEGIN 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci cmp $0x10, LEN 24062306a36Sopenharmony_ci jb .Lad_out 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci /* load the state: */ 24362306a36Sopenharmony_ci movdqu 0x00(STATEP), STATE0 24462306a36Sopenharmony_ci movdqu 0x10(STATEP), STATE1 24562306a36Sopenharmony_ci movdqu 0x20(STATEP), STATE2 24662306a36Sopenharmony_ci movdqu 0x30(STATEP), STATE3 24762306a36Sopenharmony_ci movdqu 0x40(STATEP), STATE4 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci mov SRC, %r8 25062306a36Sopenharmony_ci and $0xF, %r8 25162306a36Sopenharmony_ci jnz .Lad_u_loop 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci.align 8 25462306a36Sopenharmony_ci.Lad_a_loop: 25562306a36Sopenharmony_ci movdqa 0x00(SRC), MSG 25662306a36Sopenharmony_ci aegis128_update 25762306a36Sopenharmony_ci pxor MSG, STATE4 25862306a36Sopenharmony_ci sub $0x10, LEN 25962306a36Sopenharmony_ci cmp $0x10, LEN 26062306a36Sopenharmony_ci jl .Lad_out_1 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci movdqa 0x10(SRC), MSG 26362306a36Sopenharmony_ci aegis128_update 26462306a36Sopenharmony_ci pxor MSG, STATE3 26562306a36Sopenharmony_ci sub $0x10, LEN 26662306a36Sopenharmony_ci cmp $0x10, LEN 26762306a36Sopenharmony_ci jl .Lad_out_2 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci movdqa 0x20(SRC), MSG 27062306a36Sopenharmony_ci aegis128_update 27162306a36Sopenharmony_ci pxor MSG, STATE2 27262306a36Sopenharmony_ci sub $0x10, LEN 27362306a36Sopenharmony_ci cmp $0x10, LEN 27462306a36Sopenharmony_ci jl .Lad_out_3 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci movdqa 0x30(SRC), MSG 27762306a36Sopenharmony_ci aegis128_update 27862306a36Sopenharmony_ci pxor MSG, STATE1 27962306a36Sopenharmony_ci sub $0x10, LEN 28062306a36Sopenharmony_ci cmp $0x10, LEN 28162306a36Sopenharmony_ci jl .Lad_out_4 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci movdqa 0x40(SRC), MSG 28462306a36Sopenharmony_ci aegis128_update 28562306a36Sopenharmony_ci pxor MSG, STATE0 28662306a36Sopenharmony_ci sub $0x10, LEN 28762306a36Sopenharmony_ci cmp $0x10, LEN 28862306a36Sopenharmony_ci jl .Lad_out_0 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci add $0x50, SRC 29162306a36Sopenharmony_ci jmp .Lad_a_loop 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci.align 8 29462306a36Sopenharmony_ci.Lad_u_loop: 29562306a36Sopenharmony_ci movdqu 0x00(SRC), MSG 29662306a36Sopenharmony_ci aegis128_update 29762306a36Sopenharmony_ci pxor MSG, STATE4 29862306a36Sopenharmony_ci sub $0x10, LEN 29962306a36Sopenharmony_ci cmp $0x10, LEN 30062306a36Sopenharmony_ci jl .Lad_out_1 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci movdqu 0x10(SRC), MSG 30362306a36Sopenharmony_ci aegis128_update 30462306a36Sopenharmony_ci pxor MSG, STATE3 30562306a36Sopenharmony_ci sub $0x10, LEN 30662306a36Sopenharmony_ci cmp $0x10, LEN 30762306a36Sopenharmony_ci jl .Lad_out_2 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci movdqu 0x20(SRC), MSG 31062306a36Sopenharmony_ci aegis128_update 31162306a36Sopenharmony_ci pxor MSG, STATE2 31262306a36Sopenharmony_ci sub $0x10, LEN 31362306a36Sopenharmony_ci cmp $0x10, LEN 31462306a36Sopenharmony_ci jl .Lad_out_3 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci movdqu 0x30(SRC), MSG 31762306a36Sopenharmony_ci aegis128_update 31862306a36Sopenharmony_ci pxor MSG, STATE1 31962306a36Sopenharmony_ci sub $0x10, LEN 32062306a36Sopenharmony_ci cmp $0x10, LEN 32162306a36Sopenharmony_ci jl .Lad_out_4 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci movdqu 0x40(SRC), MSG 32462306a36Sopenharmony_ci aegis128_update 32562306a36Sopenharmony_ci pxor MSG, STATE0 32662306a36Sopenharmony_ci sub $0x10, LEN 32762306a36Sopenharmony_ci cmp $0x10, LEN 32862306a36Sopenharmony_ci jl .Lad_out_0 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci add $0x50, SRC 33162306a36Sopenharmony_ci jmp .Lad_u_loop 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci /* store the state: */ 33462306a36Sopenharmony_ci.Lad_out_0: 33562306a36Sopenharmony_ci movdqu STATE0, 0x00(STATEP) 33662306a36Sopenharmony_ci movdqu STATE1, 0x10(STATEP) 33762306a36Sopenharmony_ci movdqu STATE2, 0x20(STATEP) 33862306a36Sopenharmony_ci movdqu STATE3, 0x30(STATEP) 33962306a36Sopenharmony_ci movdqu STATE4, 0x40(STATEP) 34062306a36Sopenharmony_ci FRAME_END 34162306a36Sopenharmony_ci RET 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci.Lad_out_1: 34462306a36Sopenharmony_ci movdqu STATE4, 0x00(STATEP) 34562306a36Sopenharmony_ci movdqu STATE0, 0x10(STATEP) 34662306a36Sopenharmony_ci movdqu STATE1, 0x20(STATEP) 34762306a36Sopenharmony_ci movdqu STATE2, 0x30(STATEP) 34862306a36Sopenharmony_ci movdqu STATE3, 0x40(STATEP) 34962306a36Sopenharmony_ci FRAME_END 35062306a36Sopenharmony_ci RET 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci.Lad_out_2: 35362306a36Sopenharmony_ci movdqu STATE3, 0x00(STATEP) 35462306a36Sopenharmony_ci movdqu STATE4, 0x10(STATEP) 35562306a36Sopenharmony_ci movdqu STATE0, 0x20(STATEP) 35662306a36Sopenharmony_ci movdqu STATE1, 0x30(STATEP) 35762306a36Sopenharmony_ci movdqu STATE2, 0x40(STATEP) 35862306a36Sopenharmony_ci FRAME_END 35962306a36Sopenharmony_ci RET 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci.Lad_out_3: 36262306a36Sopenharmony_ci movdqu STATE2, 0x00(STATEP) 36362306a36Sopenharmony_ci movdqu STATE3, 0x10(STATEP) 36462306a36Sopenharmony_ci movdqu STATE4, 0x20(STATEP) 36562306a36Sopenharmony_ci movdqu STATE0, 0x30(STATEP) 36662306a36Sopenharmony_ci movdqu STATE1, 0x40(STATEP) 36762306a36Sopenharmony_ci FRAME_END 36862306a36Sopenharmony_ci RET 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci.Lad_out_4: 37162306a36Sopenharmony_ci movdqu STATE1, 0x00(STATEP) 37262306a36Sopenharmony_ci movdqu STATE2, 0x10(STATEP) 37362306a36Sopenharmony_ci movdqu STATE3, 0x20(STATEP) 37462306a36Sopenharmony_ci movdqu STATE4, 0x30(STATEP) 37562306a36Sopenharmony_ci movdqu STATE0, 0x40(STATEP) 37662306a36Sopenharmony_ci FRAME_END 37762306a36Sopenharmony_ci RET 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci.Lad_out: 38062306a36Sopenharmony_ci FRAME_END 38162306a36Sopenharmony_ci RET 38262306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_ad) 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci.macro encrypt_block a s0 s1 s2 s3 s4 i 38562306a36Sopenharmony_ci movdq\a (\i * 0x10)(SRC), MSG 38662306a36Sopenharmony_ci movdqa MSG, T0 38762306a36Sopenharmony_ci pxor \s1, T0 38862306a36Sopenharmony_ci pxor \s4, T0 38962306a36Sopenharmony_ci movdqa \s2, T1 39062306a36Sopenharmony_ci pand \s3, T1 39162306a36Sopenharmony_ci pxor T1, T0 39262306a36Sopenharmony_ci movdq\a T0, (\i * 0x10)(DST) 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci aegis128_update 39562306a36Sopenharmony_ci pxor MSG, \s4 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci sub $0x10, LEN 39862306a36Sopenharmony_ci cmp $0x10, LEN 39962306a36Sopenharmony_ci jl .Lenc_out_\i 40062306a36Sopenharmony_ci.endm 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci/* 40362306a36Sopenharmony_ci * void crypto_aegis128_aesni_enc(void *state, unsigned int length, 40462306a36Sopenharmony_ci * const void *src, void *dst); 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_ciSYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) 40762306a36Sopenharmony_ci FRAME_BEGIN 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci cmp $0x10, LEN 41062306a36Sopenharmony_ci jb .Lenc_out 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci /* load the state: */ 41362306a36Sopenharmony_ci movdqu 0x00(STATEP), STATE0 41462306a36Sopenharmony_ci movdqu 0x10(STATEP), STATE1 41562306a36Sopenharmony_ci movdqu 0x20(STATEP), STATE2 41662306a36Sopenharmony_ci movdqu 0x30(STATEP), STATE3 41762306a36Sopenharmony_ci movdqu 0x40(STATEP), STATE4 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci mov SRC, %r8 42062306a36Sopenharmony_ci or DST, %r8 42162306a36Sopenharmony_ci and $0xF, %r8 42262306a36Sopenharmony_ci jnz .Lenc_u_loop 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci.align 8 42562306a36Sopenharmony_ci.Lenc_a_loop: 42662306a36Sopenharmony_ci encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 42762306a36Sopenharmony_ci encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 42862306a36Sopenharmony_ci encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 42962306a36Sopenharmony_ci encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 43062306a36Sopenharmony_ci encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci add $0x50, SRC 43362306a36Sopenharmony_ci add $0x50, DST 43462306a36Sopenharmony_ci jmp .Lenc_a_loop 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci.align 8 43762306a36Sopenharmony_ci.Lenc_u_loop: 43862306a36Sopenharmony_ci encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 43962306a36Sopenharmony_ci encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 44062306a36Sopenharmony_ci encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 44162306a36Sopenharmony_ci encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 44262306a36Sopenharmony_ci encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci add $0x50, SRC 44562306a36Sopenharmony_ci add $0x50, DST 44662306a36Sopenharmony_ci jmp .Lenc_u_loop 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci /* store the state: */ 44962306a36Sopenharmony_ci.Lenc_out_0: 45062306a36Sopenharmony_ci movdqu STATE4, 0x00(STATEP) 45162306a36Sopenharmony_ci movdqu STATE0, 0x10(STATEP) 45262306a36Sopenharmony_ci movdqu STATE1, 0x20(STATEP) 45362306a36Sopenharmony_ci movdqu STATE2, 0x30(STATEP) 45462306a36Sopenharmony_ci movdqu STATE3, 0x40(STATEP) 45562306a36Sopenharmony_ci FRAME_END 45662306a36Sopenharmony_ci RET 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci.Lenc_out_1: 45962306a36Sopenharmony_ci movdqu STATE3, 0x00(STATEP) 46062306a36Sopenharmony_ci movdqu STATE4, 0x10(STATEP) 46162306a36Sopenharmony_ci movdqu STATE0, 0x20(STATEP) 46262306a36Sopenharmony_ci movdqu STATE1, 0x30(STATEP) 46362306a36Sopenharmony_ci movdqu STATE2, 0x40(STATEP) 46462306a36Sopenharmony_ci FRAME_END 46562306a36Sopenharmony_ci RET 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci.Lenc_out_2: 46862306a36Sopenharmony_ci movdqu STATE2, 0x00(STATEP) 46962306a36Sopenharmony_ci movdqu STATE3, 0x10(STATEP) 47062306a36Sopenharmony_ci movdqu STATE4, 0x20(STATEP) 47162306a36Sopenharmony_ci movdqu STATE0, 0x30(STATEP) 47262306a36Sopenharmony_ci movdqu STATE1, 0x40(STATEP) 47362306a36Sopenharmony_ci FRAME_END 47462306a36Sopenharmony_ci RET 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci.Lenc_out_3: 47762306a36Sopenharmony_ci movdqu STATE1, 0x00(STATEP) 47862306a36Sopenharmony_ci movdqu STATE2, 0x10(STATEP) 47962306a36Sopenharmony_ci movdqu STATE3, 0x20(STATEP) 48062306a36Sopenharmony_ci movdqu STATE4, 0x30(STATEP) 48162306a36Sopenharmony_ci movdqu STATE0, 0x40(STATEP) 48262306a36Sopenharmony_ci FRAME_END 48362306a36Sopenharmony_ci RET 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci.Lenc_out_4: 48662306a36Sopenharmony_ci movdqu STATE0, 0x00(STATEP) 48762306a36Sopenharmony_ci movdqu STATE1, 0x10(STATEP) 48862306a36Sopenharmony_ci movdqu STATE2, 0x20(STATEP) 48962306a36Sopenharmony_ci movdqu STATE3, 0x30(STATEP) 49062306a36Sopenharmony_ci movdqu STATE4, 0x40(STATEP) 49162306a36Sopenharmony_ci FRAME_END 49262306a36Sopenharmony_ci RET 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci.Lenc_out: 49562306a36Sopenharmony_ci FRAME_END 49662306a36Sopenharmony_ci RET 49762306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_enc) 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci/* 50062306a36Sopenharmony_ci * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, 50162306a36Sopenharmony_ci * const void *src, void *dst); 50262306a36Sopenharmony_ci */ 50362306a36Sopenharmony_ciSYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) 50462306a36Sopenharmony_ci FRAME_BEGIN 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci /* load the state: */ 50762306a36Sopenharmony_ci movdqu 0x00(STATEP), STATE0 50862306a36Sopenharmony_ci movdqu 0x10(STATEP), STATE1 50962306a36Sopenharmony_ci movdqu 0x20(STATEP), STATE2 51062306a36Sopenharmony_ci movdqu 0x30(STATEP), STATE3 51162306a36Sopenharmony_ci movdqu 0x40(STATEP), STATE4 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci /* encrypt message: */ 51462306a36Sopenharmony_ci call __load_partial 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci movdqa MSG, T0 51762306a36Sopenharmony_ci pxor STATE1, T0 51862306a36Sopenharmony_ci pxor STATE4, T0 51962306a36Sopenharmony_ci movdqa STATE2, T1 52062306a36Sopenharmony_ci pand STATE3, T1 52162306a36Sopenharmony_ci pxor T1, T0 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci call __store_partial 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci aegis128_update 52662306a36Sopenharmony_ci pxor MSG, STATE4 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci /* store the state: */ 52962306a36Sopenharmony_ci movdqu STATE4, 0x00(STATEP) 53062306a36Sopenharmony_ci movdqu STATE0, 0x10(STATEP) 53162306a36Sopenharmony_ci movdqu STATE1, 0x20(STATEP) 53262306a36Sopenharmony_ci movdqu STATE2, 0x30(STATEP) 53362306a36Sopenharmony_ci movdqu STATE3, 0x40(STATEP) 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci FRAME_END 53662306a36Sopenharmony_ci RET 53762306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_enc_tail) 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci.macro decrypt_block a s0 s1 s2 s3 s4 i 54062306a36Sopenharmony_ci movdq\a (\i * 0x10)(SRC), MSG 54162306a36Sopenharmony_ci pxor \s1, MSG 54262306a36Sopenharmony_ci pxor \s4, MSG 54362306a36Sopenharmony_ci movdqa \s2, T1 54462306a36Sopenharmony_ci pand \s3, T1 54562306a36Sopenharmony_ci pxor T1, MSG 54662306a36Sopenharmony_ci movdq\a MSG, (\i * 0x10)(DST) 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci aegis128_update 54962306a36Sopenharmony_ci pxor MSG, \s4 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci sub $0x10, LEN 55262306a36Sopenharmony_ci cmp $0x10, LEN 55362306a36Sopenharmony_ci jl .Ldec_out_\i 55462306a36Sopenharmony_ci.endm 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci/* 55762306a36Sopenharmony_ci * void crypto_aegis128_aesni_dec(void *state, unsigned int length, 55862306a36Sopenharmony_ci * const void *src, void *dst); 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_ciSYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) 56162306a36Sopenharmony_ci FRAME_BEGIN 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci cmp $0x10, LEN 56462306a36Sopenharmony_ci jb .Ldec_out 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci /* load the state: */ 56762306a36Sopenharmony_ci movdqu 0x00(STATEP), STATE0 56862306a36Sopenharmony_ci movdqu 0x10(STATEP), STATE1 56962306a36Sopenharmony_ci movdqu 0x20(STATEP), STATE2 57062306a36Sopenharmony_ci movdqu 0x30(STATEP), STATE3 57162306a36Sopenharmony_ci movdqu 0x40(STATEP), STATE4 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci mov SRC, %r8 57462306a36Sopenharmony_ci or DST, %r8 57562306a36Sopenharmony_ci and $0xF, %r8 57662306a36Sopenharmony_ci jnz .Ldec_u_loop 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci.align 8 57962306a36Sopenharmony_ci.Ldec_a_loop: 58062306a36Sopenharmony_ci decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 58162306a36Sopenharmony_ci decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 58262306a36Sopenharmony_ci decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 58362306a36Sopenharmony_ci decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 58462306a36Sopenharmony_ci decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci add $0x50, SRC 58762306a36Sopenharmony_ci add $0x50, DST 58862306a36Sopenharmony_ci jmp .Ldec_a_loop 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci.align 8 59162306a36Sopenharmony_ci.Ldec_u_loop: 59262306a36Sopenharmony_ci decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 59362306a36Sopenharmony_ci decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 59462306a36Sopenharmony_ci decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 59562306a36Sopenharmony_ci decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 59662306a36Sopenharmony_ci decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci add $0x50, SRC 59962306a36Sopenharmony_ci add $0x50, DST 60062306a36Sopenharmony_ci jmp .Ldec_u_loop 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci /* store the state: */ 60362306a36Sopenharmony_ci.Ldec_out_0: 60462306a36Sopenharmony_ci movdqu STATE4, 0x00(STATEP) 60562306a36Sopenharmony_ci movdqu STATE0, 0x10(STATEP) 60662306a36Sopenharmony_ci movdqu STATE1, 0x20(STATEP) 60762306a36Sopenharmony_ci movdqu STATE2, 0x30(STATEP) 60862306a36Sopenharmony_ci movdqu STATE3, 0x40(STATEP) 60962306a36Sopenharmony_ci FRAME_END 61062306a36Sopenharmony_ci RET 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci.Ldec_out_1: 61362306a36Sopenharmony_ci movdqu STATE3, 0x00(STATEP) 61462306a36Sopenharmony_ci movdqu STATE4, 0x10(STATEP) 61562306a36Sopenharmony_ci movdqu STATE0, 0x20(STATEP) 61662306a36Sopenharmony_ci movdqu STATE1, 0x30(STATEP) 61762306a36Sopenharmony_ci movdqu STATE2, 0x40(STATEP) 61862306a36Sopenharmony_ci FRAME_END 61962306a36Sopenharmony_ci RET 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci.Ldec_out_2: 62262306a36Sopenharmony_ci movdqu STATE2, 0x00(STATEP) 62362306a36Sopenharmony_ci movdqu STATE3, 0x10(STATEP) 62462306a36Sopenharmony_ci movdqu STATE4, 0x20(STATEP) 62562306a36Sopenharmony_ci movdqu STATE0, 0x30(STATEP) 62662306a36Sopenharmony_ci movdqu STATE1, 0x40(STATEP) 62762306a36Sopenharmony_ci FRAME_END 62862306a36Sopenharmony_ci RET 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci.Ldec_out_3: 63162306a36Sopenharmony_ci movdqu STATE1, 0x00(STATEP) 63262306a36Sopenharmony_ci movdqu STATE2, 0x10(STATEP) 63362306a36Sopenharmony_ci movdqu STATE3, 0x20(STATEP) 63462306a36Sopenharmony_ci movdqu STATE4, 0x30(STATEP) 63562306a36Sopenharmony_ci movdqu STATE0, 0x40(STATEP) 63662306a36Sopenharmony_ci FRAME_END 63762306a36Sopenharmony_ci RET 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci.Ldec_out_4: 64062306a36Sopenharmony_ci movdqu STATE0, 0x00(STATEP) 64162306a36Sopenharmony_ci movdqu STATE1, 0x10(STATEP) 64262306a36Sopenharmony_ci movdqu STATE2, 0x20(STATEP) 64362306a36Sopenharmony_ci movdqu STATE3, 0x30(STATEP) 64462306a36Sopenharmony_ci movdqu STATE4, 0x40(STATEP) 64562306a36Sopenharmony_ci FRAME_END 64662306a36Sopenharmony_ci RET 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci.Ldec_out: 64962306a36Sopenharmony_ci FRAME_END 65062306a36Sopenharmony_ci RET 65162306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_dec) 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci/* 65462306a36Sopenharmony_ci * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, 65562306a36Sopenharmony_ci * const void *src, void *dst); 65662306a36Sopenharmony_ci */ 65762306a36Sopenharmony_ciSYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) 65862306a36Sopenharmony_ci FRAME_BEGIN 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci /* load the state: */ 66162306a36Sopenharmony_ci movdqu 0x00(STATEP), STATE0 66262306a36Sopenharmony_ci movdqu 0x10(STATEP), STATE1 66362306a36Sopenharmony_ci movdqu 0x20(STATEP), STATE2 66462306a36Sopenharmony_ci movdqu 0x30(STATEP), STATE3 66562306a36Sopenharmony_ci movdqu 0x40(STATEP), STATE4 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci /* decrypt message: */ 66862306a36Sopenharmony_ci call __load_partial 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci pxor STATE1, MSG 67162306a36Sopenharmony_ci pxor STATE4, MSG 67262306a36Sopenharmony_ci movdqa STATE2, T1 67362306a36Sopenharmony_ci pand STATE3, T1 67462306a36Sopenharmony_ci pxor T1, MSG 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci movdqa MSG, T0 67762306a36Sopenharmony_ci call __store_partial 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci /* mask with byte count: */ 68062306a36Sopenharmony_ci movq LEN, T0 68162306a36Sopenharmony_ci punpcklbw T0, T0 68262306a36Sopenharmony_ci punpcklbw T0, T0 68362306a36Sopenharmony_ci punpcklbw T0, T0 68462306a36Sopenharmony_ci punpcklbw T0, T0 68562306a36Sopenharmony_ci movdqa .Laegis128_counter(%rip), T1 68662306a36Sopenharmony_ci pcmpgtb T1, T0 68762306a36Sopenharmony_ci pand T0, MSG 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci aegis128_update 69062306a36Sopenharmony_ci pxor MSG, STATE4 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci /* store the state: */ 69362306a36Sopenharmony_ci movdqu STATE4, 0x00(STATEP) 69462306a36Sopenharmony_ci movdqu STATE0, 0x10(STATEP) 69562306a36Sopenharmony_ci movdqu STATE1, 0x20(STATEP) 69662306a36Sopenharmony_ci movdqu STATE2, 0x30(STATEP) 69762306a36Sopenharmony_ci movdqu STATE3, 0x40(STATEP) 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci FRAME_END 70062306a36Sopenharmony_ci RET 70162306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_dec_tail) 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci/* 70462306a36Sopenharmony_ci * void crypto_aegis128_aesni_final(void *state, void *tag_xor, 70562306a36Sopenharmony_ci * u64 assoclen, u64 cryptlen); 70662306a36Sopenharmony_ci */ 70762306a36Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_final) 70862306a36Sopenharmony_ci FRAME_BEGIN 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci /* load the state: */ 71162306a36Sopenharmony_ci movdqu 0x00(STATEP), STATE0 71262306a36Sopenharmony_ci movdqu 0x10(STATEP), STATE1 71362306a36Sopenharmony_ci movdqu 0x20(STATEP), STATE2 71462306a36Sopenharmony_ci movdqu 0x30(STATEP), STATE3 71562306a36Sopenharmony_ci movdqu 0x40(STATEP), STATE4 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci /* prepare length block: */ 71862306a36Sopenharmony_ci movq %rdx, MSG 71962306a36Sopenharmony_ci movq %rcx, T0 72062306a36Sopenharmony_ci pslldq $8, T0 72162306a36Sopenharmony_ci pxor T0, MSG 72262306a36Sopenharmony_ci psllq $3, MSG /* multiply by 8 (to get bit count) */ 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci pxor STATE3, MSG 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci /* update state: */ 72762306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE4 72862306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE3 72962306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE2 73062306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE1 73162306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE0 73262306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE4 73362306a36Sopenharmony_ci aegis128_update; pxor MSG, STATE3 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* xor tag: */ 73662306a36Sopenharmony_ci movdqu (%rsi), MSG 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci pxor STATE0, MSG 73962306a36Sopenharmony_ci pxor STATE1, MSG 74062306a36Sopenharmony_ci pxor STATE2, MSG 74162306a36Sopenharmony_ci pxor STATE3, MSG 74262306a36Sopenharmony_ci pxor STATE4, MSG 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci movdqu MSG, (%rsi) 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci FRAME_END 74762306a36Sopenharmony_ci RET 74862306a36Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_final) 749