18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * AES-NI + SSE2 implementation of AEGIS-128
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
68c2ecf20Sopenharmony_ci * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/linkage.h>
108c2ecf20Sopenharmony_ci#include <asm/frame.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#define STATE0	%xmm0
138c2ecf20Sopenharmony_ci#define STATE1	%xmm1
148c2ecf20Sopenharmony_ci#define STATE2	%xmm2
158c2ecf20Sopenharmony_ci#define STATE3	%xmm3
168c2ecf20Sopenharmony_ci#define STATE4	%xmm4
178c2ecf20Sopenharmony_ci#define KEY	%xmm5
188c2ecf20Sopenharmony_ci#define MSG	%xmm5
198c2ecf20Sopenharmony_ci#define T0	%xmm6
208c2ecf20Sopenharmony_ci#define T1	%xmm7
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#define STATEP	%rdi
238c2ecf20Sopenharmony_ci#define LEN	%rsi
248c2ecf20Sopenharmony_ci#define SRC	%rdx
258c2ecf20Sopenharmony_ci#define DST	%rcx
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
288c2ecf20Sopenharmony_ci.align 16
298c2ecf20Sopenharmony_ci.Laegis128_const_0:
308c2ecf20Sopenharmony_ci	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
318c2ecf20Sopenharmony_ci	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
328c2ecf20Sopenharmony_ci.Laegis128_const_1:
338c2ecf20Sopenharmony_ci	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
348c2ecf20Sopenharmony_ci	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
378c2ecf20Sopenharmony_ci.align 16
388c2ecf20Sopenharmony_ci.Laegis128_counter:
398c2ecf20Sopenharmony_ci	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
408c2ecf20Sopenharmony_ci	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci.text
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci/*
458c2ecf20Sopenharmony_ci * aegis128_update
468c2ecf20Sopenharmony_ci * input:
478c2ecf20Sopenharmony_ci *   STATE[0-4] - input state
488c2ecf20Sopenharmony_ci * output:
498c2ecf20Sopenharmony_ci *   STATE[0-4] - output state (shifted positions)
508c2ecf20Sopenharmony_ci * changed:
518c2ecf20Sopenharmony_ci *   T0
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_ci.macro aegis128_update
548c2ecf20Sopenharmony_ci	movdqa STATE4, T0
558c2ecf20Sopenharmony_ci	aesenc STATE0, STATE4
568c2ecf20Sopenharmony_ci	aesenc STATE1, STATE0
578c2ecf20Sopenharmony_ci	aesenc STATE2, STATE1
588c2ecf20Sopenharmony_ci	aesenc STATE3, STATE2
598c2ecf20Sopenharmony_ci	aesenc T0,     STATE3
608c2ecf20Sopenharmony_ci.endm
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci/*
638c2ecf20Sopenharmony_ci * __load_partial: internal ABI
648c2ecf20Sopenharmony_ci * input:
658c2ecf20Sopenharmony_ci *   LEN - bytes
668c2ecf20Sopenharmony_ci *   SRC - src
678c2ecf20Sopenharmony_ci * output:
688c2ecf20Sopenharmony_ci *   MSG  - message block
698c2ecf20Sopenharmony_ci * changed:
708c2ecf20Sopenharmony_ci *   T0
718c2ecf20Sopenharmony_ci *   %r8
728c2ecf20Sopenharmony_ci *   %r9
738c2ecf20Sopenharmony_ci */
748c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(__load_partial)
758c2ecf20Sopenharmony_ci	xor %r9d, %r9d
768c2ecf20Sopenharmony_ci	pxor MSG, MSG
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	mov LEN, %r8
798c2ecf20Sopenharmony_ci	and $0x1, %r8
808c2ecf20Sopenharmony_ci	jz .Lld_partial_1
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	mov LEN, %r8
838c2ecf20Sopenharmony_ci	and $0x1E, %r8
848c2ecf20Sopenharmony_ci	add SRC, %r8
858c2ecf20Sopenharmony_ci	mov (%r8), %r9b
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci.Lld_partial_1:
888c2ecf20Sopenharmony_ci	mov LEN, %r8
898c2ecf20Sopenharmony_ci	and $0x2, %r8
908c2ecf20Sopenharmony_ci	jz .Lld_partial_2
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	mov LEN, %r8
938c2ecf20Sopenharmony_ci	and $0x1C, %r8
948c2ecf20Sopenharmony_ci	add SRC, %r8
958c2ecf20Sopenharmony_ci	shl $0x10, %r9
968c2ecf20Sopenharmony_ci	mov (%r8), %r9w
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci.Lld_partial_2:
998c2ecf20Sopenharmony_ci	mov LEN, %r8
1008c2ecf20Sopenharmony_ci	and $0x4, %r8
1018c2ecf20Sopenharmony_ci	jz .Lld_partial_4
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	mov LEN, %r8
1048c2ecf20Sopenharmony_ci	and $0x18, %r8
1058c2ecf20Sopenharmony_ci	add SRC, %r8
1068c2ecf20Sopenharmony_ci	shl $32, %r9
1078c2ecf20Sopenharmony_ci	mov (%r8), %r8d
1088c2ecf20Sopenharmony_ci	xor %r8, %r9
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci.Lld_partial_4:
1118c2ecf20Sopenharmony_ci	movq %r9, MSG
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	mov LEN, %r8
1148c2ecf20Sopenharmony_ci	and $0x8, %r8
1158c2ecf20Sopenharmony_ci	jz .Lld_partial_8
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	mov LEN, %r8
1188c2ecf20Sopenharmony_ci	and $0x10, %r8
1198c2ecf20Sopenharmony_ci	add SRC, %r8
1208c2ecf20Sopenharmony_ci	pslldq $8, MSG
1218c2ecf20Sopenharmony_ci	movq (%r8), T0
1228c2ecf20Sopenharmony_ci	pxor T0, MSG
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci.Lld_partial_8:
1258c2ecf20Sopenharmony_ci	RET
1268c2ecf20Sopenharmony_ciSYM_FUNC_END(__load_partial)
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci/*
1298c2ecf20Sopenharmony_ci * __store_partial: internal ABI
1308c2ecf20Sopenharmony_ci * input:
1318c2ecf20Sopenharmony_ci *   LEN - bytes
1328c2ecf20Sopenharmony_ci *   DST - dst
1338c2ecf20Sopenharmony_ci * output:
1348c2ecf20Sopenharmony_ci *   T0   - message block
1358c2ecf20Sopenharmony_ci * changed:
1368c2ecf20Sopenharmony_ci *   %r8
1378c2ecf20Sopenharmony_ci *   %r9
1388c2ecf20Sopenharmony_ci *   %r10
1398c2ecf20Sopenharmony_ci */
1408c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(__store_partial)
1418c2ecf20Sopenharmony_ci	mov LEN, %r8
1428c2ecf20Sopenharmony_ci	mov DST, %r9
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	movq T0, %r10
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	cmp $8, %r8
1478c2ecf20Sopenharmony_ci	jl .Lst_partial_8
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	mov %r10, (%r9)
1508c2ecf20Sopenharmony_ci	psrldq $8, T0
1518c2ecf20Sopenharmony_ci	movq T0, %r10
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	sub $8, %r8
1548c2ecf20Sopenharmony_ci	add $8, %r9
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci.Lst_partial_8:
1578c2ecf20Sopenharmony_ci	cmp $4, %r8
1588c2ecf20Sopenharmony_ci	jl .Lst_partial_4
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	mov %r10d, (%r9)
1618c2ecf20Sopenharmony_ci	shr $32, %r10
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	sub $4, %r8
1648c2ecf20Sopenharmony_ci	add $4, %r9
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci.Lst_partial_4:
1678c2ecf20Sopenharmony_ci	cmp $2, %r8
1688c2ecf20Sopenharmony_ci	jl .Lst_partial_2
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	mov %r10w, (%r9)
1718c2ecf20Sopenharmony_ci	shr $0x10, %r10
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	sub $2, %r8
1748c2ecf20Sopenharmony_ci	add $2, %r9
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci.Lst_partial_2:
1778c2ecf20Sopenharmony_ci	cmp $1, %r8
1788c2ecf20Sopenharmony_ci	jl .Lst_partial_1
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	mov %r10b, (%r9)
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci.Lst_partial_1:
1838c2ecf20Sopenharmony_ci	RET
1848c2ecf20Sopenharmony_ciSYM_FUNC_END(__store_partial)
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci/*
1878c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
1888c2ecf20Sopenharmony_ci */
1898c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_init)
1908c2ecf20Sopenharmony_ci	FRAME_BEGIN
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	/* load IV: */
1938c2ecf20Sopenharmony_ci	movdqu (%rdx), T1
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	/* load key: */
1968c2ecf20Sopenharmony_ci	movdqa (%rsi), KEY
1978c2ecf20Sopenharmony_ci	pxor KEY, T1
1988c2ecf20Sopenharmony_ci	movdqa T1, STATE0
1998c2ecf20Sopenharmony_ci	movdqa KEY, STATE3
2008c2ecf20Sopenharmony_ci	movdqa KEY, STATE4
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	/* load the constants: */
2038c2ecf20Sopenharmony_ci	movdqa .Laegis128_const_0, STATE2
2048c2ecf20Sopenharmony_ci	movdqa .Laegis128_const_1, STATE1
2058c2ecf20Sopenharmony_ci	pxor STATE2, STATE3
2068c2ecf20Sopenharmony_ci	pxor STATE1, STATE4
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	/* update 10 times with KEY / KEY xor IV: */
2098c2ecf20Sopenharmony_ci	aegis128_update; pxor KEY, STATE4
2108c2ecf20Sopenharmony_ci	aegis128_update; pxor T1,  STATE3
2118c2ecf20Sopenharmony_ci	aegis128_update; pxor KEY, STATE2
2128c2ecf20Sopenharmony_ci	aegis128_update; pxor T1,  STATE1
2138c2ecf20Sopenharmony_ci	aegis128_update; pxor KEY, STATE0
2148c2ecf20Sopenharmony_ci	aegis128_update; pxor T1,  STATE4
2158c2ecf20Sopenharmony_ci	aegis128_update; pxor KEY, STATE3
2168c2ecf20Sopenharmony_ci	aegis128_update; pxor T1,  STATE2
2178c2ecf20Sopenharmony_ci	aegis128_update; pxor KEY, STATE1
2188c2ecf20Sopenharmony_ci	aegis128_update; pxor T1,  STATE0
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	/* store the state: */
2218c2ecf20Sopenharmony_ci	movdqu STATE0, 0x00(STATEP)
2228c2ecf20Sopenharmony_ci	movdqu STATE1, 0x10(STATEP)
2238c2ecf20Sopenharmony_ci	movdqu STATE2, 0x20(STATEP)
2248c2ecf20Sopenharmony_ci	movdqu STATE3, 0x30(STATEP)
2258c2ecf20Sopenharmony_ci	movdqu STATE4, 0x40(STATEP)
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	FRAME_END
2288c2ecf20Sopenharmony_ci	RET
2298c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_init)
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci/*
2328c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
2338c2ecf20Sopenharmony_ci *                               const void *data);
2348c2ecf20Sopenharmony_ci */
2358c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_ad)
2368c2ecf20Sopenharmony_ci	FRAME_BEGIN
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2398c2ecf20Sopenharmony_ci	jb .Lad_out
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	/* load the state: */
2428c2ecf20Sopenharmony_ci	movdqu 0x00(STATEP), STATE0
2438c2ecf20Sopenharmony_ci	movdqu 0x10(STATEP), STATE1
2448c2ecf20Sopenharmony_ci	movdqu 0x20(STATEP), STATE2
2458c2ecf20Sopenharmony_ci	movdqu 0x30(STATEP), STATE3
2468c2ecf20Sopenharmony_ci	movdqu 0x40(STATEP), STATE4
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	mov SRC, %r8
2498c2ecf20Sopenharmony_ci	and $0xF, %r8
2508c2ecf20Sopenharmony_ci	jnz .Lad_u_loop
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci.align 8
2538c2ecf20Sopenharmony_ci.Lad_a_loop:
2548c2ecf20Sopenharmony_ci	movdqa 0x00(SRC), MSG
2558c2ecf20Sopenharmony_ci	aegis128_update
2568c2ecf20Sopenharmony_ci	pxor MSG, STATE4
2578c2ecf20Sopenharmony_ci	sub $0x10, LEN
2588c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2598c2ecf20Sopenharmony_ci	jl .Lad_out_1
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	movdqa 0x10(SRC), MSG
2628c2ecf20Sopenharmony_ci	aegis128_update
2638c2ecf20Sopenharmony_ci	pxor MSG, STATE3
2648c2ecf20Sopenharmony_ci	sub $0x10, LEN
2658c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2668c2ecf20Sopenharmony_ci	jl .Lad_out_2
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	movdqa 0x20(SRC), MSG
2698c2ecf20Sopenharmony_ci	aegis128_update
2708c2ecf20Sopenharmony_ci	pxor MSG, STATE2
2718c2ecf20Sopenharmony_ci	sub $0x10, LEN
2728c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2738c2ecf20Sopenharmony_ci	jl .Lad_out_3
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	movdqa 0x30(SRC), MSG
2768c2ecf20Sopenharmony_ci	aegis128_update
2778c2ecf20Sopenharmony_ci	pxor MSG, STATE1
2788c2ecf20Sopenharmony_ci	sub $0x10, LEN
2798c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2808c2ecf20Sopenharmony_ci	jl .Lad_out_4
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	movdqa 0x40(SRC), MSG
2838c2ecf20Sopenharmony_ci	aegis128_update
2848c2ecf20Sopenharmony_ci	pxor MSG, STATE0
2858c2ecf20Sopenharmony_ci	sub $0x10, LEN
2868c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2878c2ecf20Sopenharmony_ci	jl .Lad_out_0
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	add $0x50, SRC
2908c2ecf20Sopenharmony_ci	jmp .Lad_a_loop
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci.align 8
2938c2ecf20Sopenharmony_ci.Lad_u_loop:
2948c2ecf20Sopenharmony_ci	movdqu 0x00(SRC), MSG
2958c2ecf20Sopenharmony_ci	aegis128_update
2968c2ecf20Sopenharmony_ci	pxor MSG, STATE4
2978c2ecf20Sopenharmony_ci	sub $0x10, LEN
2988c2ecf20Sopenharmony_ci	cmp $0x10, LEN
2998c2ecf20Sopenharmony_ci	jl .Lad_out_1
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	movdqu 0x10(SRC), MSG
3028c2ecf20Sopenharmony_ci	aegis128_update
3038c2ecf20Sopenharmony_ci	pxor MSG, STATE3
3048c2ecf20Sopenharmony_ci	sub $0x10, LEN
3058c2ecf20Sopenharmony_ci	cmp $0x10, LEN
3068c2ecf20Sopenharmony_ci	jl .Lad_out_2
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	movdqu 0x20(SRC), MSG
3098c2ecf20Sopenharmony_ci	aegis128_update
3108c2ecf20Sopenharmony_ci	pxor MSG, STATE2
3118c2ecf20Sopenharmony_ci	sub $0x10, LEN
3128c2ecf20Sopenharmony_ci	cmp $0x10, LEN
3138c2ecf20Sopenharmony_ci	jl .Lad_out_3
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	movdqu 0x30(SRC), MSG
3168c2ecf20Sopenharmony_ci	aegis128_update
3178c2ecf20Sopenharmony_ci	pxor MSG, STATE1
3188c2ecf20Sopenharmony_ci	sub $0x10, LEN
3198c2ecf20Sopenharmony_ci	cmp $0x10, LEN
3208c2ecf20Sopenharmony_ci	jl .Lad_out_4
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	movdqu 0x40(SRC), MSG
3238c2ecf20Sopenharmony_ci	aegis128_update
3248c2ecf20Sopenharmony_ci	pxor MSG, STATE0
3258c2ecf20Sopenharmony_ci	sub $0x10, LEN
3268c2ecf20Sopenharmony_ci	cmp $0x10, LEN
3278c2ecf20Sopenharmony_ci	jl .Lad_out_0
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	add $0x50, SRC
3308c2ecf20Sopenharmony_ci	jmp .Lad_u_loop
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci	/* store the state: */
3338c2ecf20Sopenharmony_ci.Lad_out_0:
3348c2ecf20Sopenharmony_ci	movdqu STATE0, 0x00(STATEP)
3358c2ecf20Sopenharmony_ci	movdqu STATE1, 0x10(STATEP)
3368c2ecf20Sopenharmony_ci	movdqu STATE2, 0x20(STATEP)
3378c2ecf20Sopenharmony_ci	movdqu STATE3, 0x30(STATEP)
3388c2ecf20Sopenharmony_ci	movdqu STATE4, 0x40(STATEP)
3398c2ecf20Sopenharmony_ci	FRAME_END
3408c2ecf20Sopenharmony_ci	RET
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci.Lad_out_1:
3438c2ecf20Sopenharmony_ci	movdqu STATE4, 0x00(STATEP)
3448c2ecf20Sopenharmony_ci	movdqu STATE0, 0x10(STATEP)
3458c2ecf20Sopenharmony_ci	movdqu STATE1, 0x20(STATEP)
3468c2ecf20Sopenharmony_ci	movdqu STATE2, 0x30(STATEP)
3478c2ecf20Sopenharmony_ci	movdqu STATE3, 0x40(STATEP)
3488c2ecf20Sopenharmony_ci	FRAME_END
3498c2ecf20Sopenharmony_ci	RET
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci.Lad_out_2:
3528c2ecf20Sopenharmony_ci	movdqu STATE3, 0x00(STATEP)
3538c2ecf20Sopenharmony_ci	movdqu STATE4, 0x10(STATEP)
3548c2ecf20Sopenharmony_ci	movdqu STATE0, 0x20(STATEP)
3558c2ecf20Sopenharmony_ci	movdqu STATE1, 0x30(STATEP)
3568c2ecf20Sopenharmony_ci	movdqu STATE2, 0x40(STATEP)
3578c2ecf20Sopenharmony_ci	FRAME_END
3588c2ecf20Sopenharmony_ci	RET
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci.Lad_out_3:
3618c2ecf20Sopenharmony_ci	movdqu STATE2, 0x00(STATEP)
3628c2ecf20Sopenharmony_ci	movdqu STATE3, 0x10(STATEP)
3638c2ecf20Sopenharmony_ci	movdqu STATE4, 0x20(STATEP)
3648c2ecf20Sopenharmony_ci	movdqu STATE0, 0x30(STATEP)
3658c2ecf20Sopenharmony_ci	movdqu STATE1, 0x40(STATEP)
3668c2ecf20Sopenharmony_ci	FRAME_END
3678c2ecf20Sopenharmony_ci	RET
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci.Lad_out_4:
3708c2ecf20Sopenharmony_ci	movdqu STATE1, 0x00(STATEP)
3718c2ecf20Sopenharmony_ci	movdqu STATE2, 0x10(STATEP)
3728c2ecf20Sopenharmony_ci	movdqu STATE3, 0x20(STATEP)
3738c2ecf20Sopenharmony_ci	movdqu STATE4, 0x30(STATEP)
3748c2ecf20Sopenharmony_ci	movdqu STATE0, 0x40(STATEP)
3758c2ecf20Sopenharmony_ci	FRAME_END
3768c2ecf20Sopenharmony_ci	RET
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci.Lad_out:
3798c2ecf20Sopenharmony_ci	FRAME_END
3808c2ecf20Sopenharmony_ci	RET
3818c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_ad)
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci.macro encrypt_block a s0 s1 s2 s3 s4 i
3848c2ecf20Sopenharmony_ci	movdq\a (\i * 0x10)(SRC), MSG
3858c2ecf20Sopenharmony_ci	movdqa MSG, T0
3868c2ecf20Sopenharmony_ci	pxor \s1, T0
3878c2ecf20Sopenharmony_ci	pxor \s4, T0
3888c2ecf20Sopenharmony_ci	movdqa \s2, T1
3898c2ecf20Sopenharmony_ci	pand \s3, T1
3908c2ecf20Sopenharmony_ci	pxor T1, T0
3918c2ecf20Sopenharmony_ci	movdq\a T0, (\i * 0x10)(DST)
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	aegis128_update
3948c2ecf20Sopenharmony_ci	pxor MSG, \s4
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	sub $0x10, LEN
3978c2ecf20Sopenharmony_ci	cmp $0x10, LEN
3988c2ecf20Sopenharmony_ci	jl .Lenc_out_\i
3998c2ecf20Sopenharmony_ci.endm
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci/*
4028c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
4038c2ecf20Sopenharmony_ci *                                const void *src, void *dst);
4048c2ecf20Sopenharmony_ci */
4058c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_enc)
4068c2ecf20Sopenharmony_ci	FRAME_BEGIN
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	cmp $0x10, LEN
4098c2ecf20Sopenharmony_ci	jb .Lenc_out
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	/* load the state: */
4128c2ecf20Sopenharmony_ci	movdqu 0x00(STATEP), STATE0
4138c2ecf20Sopenharmony_ci	movdqu 0x10(STATEP), STATE1
4148c2ecf20Sopenharmony_ci	movdqu 0x20(STATEP), STATE2
4158c2ecf20Sopenharmony_ci	movdqu 0x30(STATEP), STATE3
4168c2ecf20Sopenharmony_ci	movdqu 0x40(STATEP), STATE4
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci	mov  SRC,  %r8
4198c2ecf20Sopenharmony_ci	or   DST,  %r8
4208c2ecf20Sopenharmony_ci	and $0xF, %r8
4218c2ecf20Sopenharmony_ci	jnz .Lenc_u_loop
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci.align 8
4248c2ecf20Sopenharmony_ci.Lenc_a_loop:
4258c2ecf20Sopenharmony_ci	encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
4268c2ecf20Sopenharmony_ci	encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
4278c2ecf20Sopenharmony_ci	encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
4288c2ecf20Sopenharmony_ci	encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
4298c2ecf20Sopenharmony_ci	encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	add $0x50, SRC
4328c2ecf20Sopenharmony_ci	add $0x50, DST
4338c2ecf20Sopenharmony_ci	jmp .Lenc_a_loop
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci.align 8
4368c2ecf20Sopenharmony_ci.Lenc_u_loop:
4378c2ecf20Sopenharmony_ci	encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
4388c2ecf20Sopenharmony_ci	encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
4398c2ecf20Sopenharmony_ci	encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
4408c2ecf20Sopenharmony_ci	encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
4418c2ecf20Sopenharmony_ci	encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	add $0x50, SRC
4448c2ecf20Sopenharmony_ci	add $0x50, DST
4458c2ecf20Sopenharmony_ci	jmp .Lenc_u_loop
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	/* store the state: */
4488c2ecf20Sopenharmony_ci.Lenc_out_0:
4498c2ecf20Sopenharmony_ci	movdqu STATE4, 0x00(STATEP)
4508c2ecf20Sopenharmony_ci	movdqu STATE0, 0x10(STATEP)
4518c2ecf20Sopenharmony_ci	movdqu STATE1, 0x20(STATEP)
4528c2ecf20Sopenharmony_ci	movdqu STATE2, 0x30(STATEP)
4538c2ecf20Sopenharmony_ci	movdqu STATE3, 0x40(STATEP)
4548c2ecf20Sopenharmony_ci	FRAME_END
4558c2ecf20Sopenharmony_ci	RET
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci.Lenc_out_1:
4588c2ecf20Sopenharmony_ci	movdqu STATE3, 0x00(STATEP)
4598c2ecf20Sopenharmony_ci	movdqu STATE4, 0x10(STATEP)
4608c2ecf20Sopenharmony_ci	movdqu STATE0, 0x20(STATEP)
4618c2ecf20Sopenharmony_ci	movdqu STATE1, 0x30(STATEP)
4628c2ecf20Sopenharmony_ci	movdqu STATE2, 0x40(STATEP)
4638c2ecf20Sopenharmony_ci	FRAME_END
4648c2ecf20Sopenharmony_ci	RET
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci.Lenc_out_2:
4678c2ecf20Sopenharmony_ci	movdqu STATE2, 0x00(STATEP)
4688c2ecf20Sopenharmony_ci	movdqu STATE3, 0x10(STATEP)
4698c2ecf20Sopenharmony_ci	movdqu STATE4, 0x20(STATEP)
4708c2ecf20Sopenharmony_ci	movdqu STATE0, 0x30(STATEP)
4718c2ecf20Sopenharmony_ci	movdqu STATE1, 0x40(STATEP)
4728c2ecf20Sopenharmony_ci	FRAME_END
4738c2ecf20Sopenharmony_ci	RET
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_ci.Lenc_out_3:
4768c2ecf20Sopenharmony_ci	movdqu STATE1, 0x00(STATEP)
4778c2ecf20Sopenharmony_ci	movdqu STATE2, 0x10(STATEP)
4788c2ecf20Sopenharmony_ci	movdqu STATE3, 0x20(STATEP)
4798c2ecf20Sopenharmony_ci	movdqu STATE4, 0x30(STATEP)
4808c2ecf20Sopenharmony_ci	movdqu STATE0, 0x40(STATEP)
4818c2ecf20Sopenharmony_ci	FRAME_END
4828c2ecf20Sopenharmony_ci	RET
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci.Lenc_out_4:
4858c2ecf20Sopenharmony_ci	movdqu STATE0, 0x00(STATEP)
4868c2ecf20Sopenharmony_ci	movdqu STATE1, 0x10(STATEP)
4878c2ecf20Sopenharmony_ci	movdqu STATE2, 0x20(STATEP)
4888c2ecf20Sopenharmony_ci	movdqu STATE3, 0x30(STATEP)
4898c2ecf20Sopenharmony_ci	movdqu STATE4, 0x40(STATEP)
4908c2ecf20Sopenharmony_ci	FRAME_END
4918c2ecf20Sopenharmony_ci	RET
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci.Lenc_out:
4948c2ecf20Sopenharmony_ci	FRAME_END
4958c2ecf20Sopenharmony_ci	RET
4968c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_enc)
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci/*
4998c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
5008c2ecf20Sopenharmony_ci *                                     const void *src, void *dst);
5018c2ecf20Sopenharmony_ci */
5028c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
5038c2ecf20Sopenharmony_ci	FRAME_BEGIN
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	/* load the state: */
5068c2ecf20Sopenharmony_ci	movdqu 0x00(STATEP), STATE0
5078c2ecf20Sopenharmony_ci	movdqu 0x10(STATEP), STATE1
5088c2ecf20Sopenharmony_ci	movdqu 0x20(STATEP), STATE2
5098c2ecf20Sopenharmony_ci	movdqu 0x30(STATEP), STATE3
5108c2ecf20Sopenharmony_ci	movdqu 0x40(STATEP), STATE4
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	/* encrypt message: */
5138c2ecf20Sopenharmony_ci	call __load_partial
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	movdqa MSG, T0
5168c2ecf20Sopenharmony_ci	pxor STATE1, T0
5178c2ecf20Sopenharmony_ci	pxor STATE4, T0
5188c2ecf20Sopenharmony_ci	movdqa STATE2, T1
5198c2ecf20Sopenharmony_ci	pand STATE3, T1
5208c2ecf20Sopenharmony_ci	pxor T1, T0
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	call __store_partial
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	aegis128_update
5258c2ecf20Sopenharmony_ci	pxor MSG, STATE4
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	/* store the state: */
5288c2ecf20Sopenharmony_ci	movdqu STATE4, 0x00(STATEP)
5298c2ecf20Sopenharmony_ci	movdqu STATE0, 0x10(STATEP)
5308c2ecf20Sopenharmony_ci	movdqu STATE1, 0x20(STATEP)
5318c2ecf20Sopenharmony_ci	movdqu STATE2, 0x30(STATEP)
5328c2ecf20Sopenharmony_ci	movdqu STATE3, 0x40(STATEP)
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	FRAME_END
5358c2ecf20Sopenharmony_ci	RET
5368c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci.macro decrypt_block a s0 s1 s2 s3 s4 i
5398c2ecf20Sopenharmony_ci	movdq\a (\i * 0x10)(SRC), MSG
5408c2ecf20Sopenharmony_ci	pxor \s1, MSG
5418c2ecf20Sopenharmony_ci	pxor \s4, MSG
5428c2ecf20Sopenharmony_ci	movdqa \s2, T1
5438c2ecf20Sopenharmony_ci	pand \s3, T1
5448c2ecf20Sopenharmony_ci	pxor T1, MSG
5458c2ecf20Sopenharmony_ci	movdq\a MSG, (\i * 0x10)(DST)
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	aegis128_update
5488c2ecf20Sopenharmony_ci	pxor MSG, \s4
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci	sub $0x10, LEN
5518c2ecf20Sopenharmony_ci	cmp $0x10, LEN
5528c2ecf20Sopenharmony_ci	jl .Ldec_out_\i
5538c2ecf20Sopenharmony_ci.endm
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci/*
5568c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
5578c2ecf20Sopenharmony_ci *                                const void *src, void *dst);
5588c2ecf20Sopenharmony_ci */
5598c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_dec)
5608c2ecf20Sopenharmony_ci	FRAME_BEGIN
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	cmp $0x10, LEN
5638c2ecf20Sopenharmony_ci	jb .Ldec_out
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	/* load the state: */
5668c2ecf20Sopenharmony_ci	movdqu 0x00(STATEP), STATE0
5678c2ecf20Sopenharmony_ci	movdqu 0x10(STATEP), STATE1
5688c2ecf20Sopenharmony_ci	movdqu 0x20(STATEP), STATE2
5698c2ecf20Sopenharmony_ci	movdqu 0x30(STATEP), STATE3
5708c2ecf20Sopenharmony_ci	movdqu 0x40(STATEP), STATE4
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	mov  SRC, %r8
5738c2ecf20Sopenharmony_ci	or   DST, %r8
5748c2ecf20Sopenharmony_ci	and $0xF, %r8
5758c2ecf20Sopenharmony_ci	jnz .Ldec_u_loop
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci.align 8
5788c2ecf20Sopenharmony_ci.Ldec_a_loop:
5798c2ecf20Sopenharmony_ci	decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
5808c2ecf20Sopenharmony_ci	decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
5818c2ecf20Sopenharmony_ci	decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
5828c2ecf20Sopenharmony_ci	decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
5838c2ecf20Sopenharmony_ci	decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci	add $0x50, SRC
5868c2ecf20Sopenharmony_ci	add $0x50, DST
5878c2ecf20Sopenharmony_ci	jmp .Ldec_a_loop
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci.align 8
5908c2ecf20Sopenharmony_ci.Ldec_u_loop:
5918c2ecf20Sopenharmony_ci	decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
5928c2ecf20Sopenharmony_ci	decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
5938c2ecf20Sopenharmony_ci	decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
5948c2ecf20Sopenharmony_ci	decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
5958c2ecf20Sopenharmony_ci	decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	add $0x50, SRC
5988c2ecf20Sopenharmony_ci	add $0x50, DST
5998c2ecf20Sopenharmony_ci	jmp .Ldec_u_loop
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	/* store the state: */
6028c2ecf20Sopenharmony_ci.Ldec_out_0:
6038c2ecf20Sopenharmony_ci	movdqu STATE4, 0x00(STATEP)
6048c2ecf20Sopenharmony_ci	movdqu STATE0, 0x10(STATEP)
6058c2ecf20Sopenharmony_ci	movdqu STATE1, 0x20(STATEP)
6068c2ecf20Sopenharmony_ci	movdqu STATE2, 0x30(STATEP)
6078c2ecf20Sopenharmony_ci	movdqu STATE3, 0x40(STATEP)
6088c2ecf20Sopenharmony_ci	FRAME_END
6098c2ecf20Sopenharmony_ci	RET
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci.Ldec_out_1:
6128c2ecf20Sopenharmony_ci	movdqu STATE3, 0x00(STATEP)
6138c2ecf20Sopenharmony_ci	movdqu STATE4, 0x10(STATEP)
6148c2ecf20Sopenharmony_ci	movdqu STATE0, 0x20(STATEP)
6158c2ecf20Sopenharmony_ci	movdqu STATE1, 0x30(STATEP)
6168c2ecf20Sopenharmony_ci	movdqu STATE2, 0x40(STATEP)
6178c2ecf20Sopenharmony_ci	FRAME_END
6188c2ecf20Sopenharmony_ci	RET
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci.Ldec_out_2:
6218c2ecf20Sopenharmony_ci	movdqu STATE2, 0x00(STATEP)
6228c2ecf20Sopenharmony_ci	movdqu STATE3, 0x10(STATEP)
6238c2ecf20Sopenharmony_ci	movdqu STATE4, 0x20(STATEP)
6248c2ecf20Sopenharmony_ci	movdqu STATE0, 0x30(STATEP)
6258c2ecf20Sopenharmony_ci	movdqu STATE1, 0x40(STATEP)
6268c2ecf20Sopenharmony_ci	FRAME_END
6278c2ecf20Sopenharmony_ci	RET
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci.Ldec_out_3:
6308c2ecf20Sopenharmony_ci	movdqu STATE1, 0x00(STATEP)
6318c2ecf20Sopenharmony_ci	movdqu STATE2, 0x10(STATEP)
6328c2ecf20Sopenharmony_ci	movdqu STATE3, 0x20(STATEP)
6338c2ecf20Sopenharmony_ci	movdqu STATE4, 0x30(STATEP)
6348c2ecf20Sopenharmony_ci	movdqu STATE0, 0x40(STATEP)
6358c2ecf20Sopenharmony_ci	FRAME_END
6368c2ecf20Sopenharmony_ci	RET
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci.Ldec_out_4:
6398c2ecf20Sopenharmony_ci	movdqu STATE0, 0x00(STATEP)
6408c2ecf20Sopenharmony_ci	movdqu STATE1, 0x10(STATEP)
6418c2ecf20Sopenharmony_ci	movdqu STATE2, 0x20(STATEP)
6428c2ecf20Sopenharmony_ci	movdqu STATE3, 0x30(STATEP)
6438c2ecf20Sopenharmony_ci	movdqu STATE4, 0x40(STATEP)
6448c2ecf20Sopenharmony_ci	FRAME_END
6458c2ecf20Sopenharmony_ci	RET
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci.Ldec_out:
6488c2ecf20Sopenharmony_ci	FRAME_END
6498c2ecf20Sopenharmony_ci	RET
6508c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_dec)
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci/*
6538c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
6548c2ecf20Sopenharmony_ci *                                     const void *src, void *dst);
6558c2ecf20Sopenharmony_ci */
6568c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
6578c2ecf20Sopenharmony_ci	FRAME_BEGIN
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	/* load the state: */
6608c2ecf20Sopenharmony_ci	movdqu 0x00(STATEP), STATE0
6618c2ecf20Sopenharmony_ci	movdqu 0x10(STATEP), STATE1
6628c2ecf20Sopenharmony_ci	movdqu 0x20(STATEP), STATE2
6638c2ecf20Sopenharmony_ci	movdqu 0x30(STATEP), STATE3
6648c2ecf20Sopenharmony_ci	movdqu 0x40(STATEP), STATE4
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	/* decrypt message: */
6678c2ecf20Sopenharmony_ci	call __load_partial
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	pxor STATE1, MSG
6708c2ecf20Sopenharmony_ci	pxor STATE4, MSG
6718c2ecf20Sopenharmony_ci	movdqa STATE2, T1
6728c2ecf20Sopenharmony_ci	pand STATE3, T1
6738c2ecf20Sopenharmony_ci	pxor T1, MSG
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci	movdqa MSG, T0
6768c2ecf20Sopenharmony_ci	call __store_partial
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	/* mask with byte count: */
6798c2ecf20Sopenharmony_ci	movq LEN, T0
6808c2ecf20Sopenharmony_ci	punpcklbw T0, T0
6818c2ecf20Sopenharmony_ci	punpcklbw T0, T0
6828c2ecf20Sopenharmony_ci	punpcklbw T0, T0
6838c2ecf20Sopenharmony_ci	punpcklbw T0, T0
6848c2ecf20Sopenharmony_ci	movdqa .Laegis128_counter, T1
6858c2ecf20Sopenharmony_ci	pcmpgtb T1, T0
6868c2ecf20Sopenharmony_ci	pand T0, MSG
6878c2ecf20Sopenharmony_ci
6888c2ecf20Sopenharmony_ci	aegis128_update
6898c2ecf20Sopenharmony_ci	pxor MSG, STATE4
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	/* store the state: */
6928c2ecf20Sopenharmony_ci	movdqu STATE4, 0x00(STATEP)
6938c2ecf20Sopenharmony_ci	movdqu STATE0, 0x10(STATEP)
6948c2ecf20Sopenharmony_ci	movdqu STATE1, 0x20(STATEP)
6958c2ecf20Sopenharmony_ci	movdqu STATE2, 0x30(STATEP)
6968c2ecf20Sopenharmony_ci	movdqu STATE3, 0x40(STATEP)
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	FRAME_END
6998c2ecf20Sopenharmony_ci	RET
7008c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci/*
7038c2ecf20Sopenharmony_ci * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
7048c2ecf20Sopenharmony_ci *                                  u64 assoclen, u64 cryptlen);
7058c2ecf20Sopenharmony_ci */
7068c2ecf20Sopenharmony_ciSYM_FUNC_START(crypto_aegis128_aesni_final)
7078c2ecf20Sopenharmony_ci	FRAME_BEGIN
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	/* load the state: */
7108c2ecf20Sopenharmony_ci	movdqu 0x00(STATEP), STATE0
7118c2ecf20Sopenharmony_ci	movdqu 0x10(STATEP), STATE1
7128c2ecf20Sopenharmony_ci	movdqu 0x20(STATEP), STATE2
7138c2ecf20Sopenharmony_ci	movdqu 0x30(STATEP), STATE3
7148c2ecf20Sopenharmony_ci	movdqu 0x40(STATEP), STATE4
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	/* prepare length block: */
7178c2ecf20Sopenharmony_ci	movq %rdx, MSG
7188c2ecf20Sopenharmony_ci	movq %rcx, T0
7198c2ecf20Sopenharmony_ci	pslldq $8, T0
7208c2ecf20Sopenharmony_ci	pxor T0, MSG
7218c2ecf20Sopenharmony_ci	psllq $3, MSG /* multiply by 8 (to get bit count) */
7228c2ecf20Sopenharmony_ci
7238c2ecf20Sopenharmony_ci	pxor STATE3, MSG
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_ci	/* update state: */
7268c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE4
7278c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE3
7288c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE2
7298c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE1
7308c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE0
7318c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE4
7328c2ecf20Sopenharmony_ci	aegis128_update; pxor MSG, STATE3
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_ci	/* xor tag: */
7358c2ecf20Sopenharmony_ci	movdqu (%rsi), MSG
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci	pxor STATE0, MSG
7388c2ecf20Sopenharmony_ci	pxor STATE1, MSG
7398c2ecf20Sopenharmony_ci	pxor STATE2, MSG
7408c2ecf20Sopenharmony_ci	pxor STATE3, MSG
7418c2ecf20Sopenharmony_ci	pxor STATE4, MSG
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_ci	movdqu MSG, (%rsi)
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	FRAME_END
7468c2ecf20Sopenharmony_ci	RET
7478c2ecf20Sopenharmony_ciSYM_FUNC_END(crypto_aegis128_aesni_final)
748