162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Accelerated GHASH implementation with Intel PCLMULQDQ-NI 462306a36Sopenharmony_ci * instructions. This file contains accelerated part of ghash 562306a36Sopenharmony_ci * implementation. More information about PCLMULQDQ can be found at: 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Copyright (c) 2009 Intel Corp. 1062306a36Sopenharmony_ci * Author: Huang Ying <ying.huang@intel.com> 1162306a36Sopenharmony_ci * Vinodh Gopal 1262306a36Sopenharmony_ci * Erdinc Ozturk 1362306a36Sopenharmony_ci * Deniz Karakoyunlu 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/linkage.h> 1762306a36Sopenharmony_ci#include <asm/frame.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 2062306a36Sopenharmony_ci.align 16 2162306a36Sopenharmony_ci.Lbswap_mask: 2262306a36Sopenharmony_ci .octa 0x000102030405060708090a0b0c0d0e0f 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define DATA %xmm0 2562306a36Sopenharmony_ci#define SHASH %xmm1 2662306a36Sopenharmony_ci#define T1 %xmm2 2762306a36Sopenharmony_ci#define T2 %xmm3 2862306a36Sopenharmony_ci#define T3 %xmm4 2962306a36Sopenharmony_ci#define BSWAP %xmm5 3062306a36Sopenharmony_ci#define IN1 %xmm6 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci.text 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/* 3562306a36Sopenharmony_ci * __clmul_gf128mul_ble: internal ABI 3662306a36Sopenharmony_ci * input: 3762306a36Sopenharmony_ci * DATA: operand1 3862306a36Sopenharmony_ci * SHASH: operand2, hash_key << 1 mod poly 3962306a36Sopenharmony_ci * output: 4062306a36Sopenharmony_ci * DATA: operand1 * operand2 mod poly 4162306a36Sopenharmony_ci * changed: 4262306a36Sopenharmony_ci * T1 4362306a36Sopenharmony_ci * T2 4462306a36Sopenharmony_ci * T3 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) 4762306a36Sopenharmony_ci movaps DATA, T1 4862306a36Sopenharmony_ci pshufd $0b01001110, DATA, T2 4962306a36Sopenharmony_ci pshufd $0b01001110, SHASH, T3 5062306a36Sopenharmony_ci pxor DATA, T2 5162306a36Sopenharmony_ci pxor SHASH, T3 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 5462306a36Sopenharmony_ci pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 5562306a36Sopenharmony_ci pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) 5662306a36Sopenharmony_ci pxor DATA, T2 5762306a36Sopenharmony_ci pxor T1, T2 # T2 = a0 * b1 + a1 * b0 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci movaps T2, T3 6062306a36Sopenharmony_ci pslldq $8, T3 6162306a36Sopenharmony_ci psrldq $8, T2 6262306a36Sopenharmony_ci pxor T3, DATA 6362306a36Sopenharmony_ci pxor T2, T1 # <T1:DATA> is result of 6462306a36Sopenharmony_ci # carry-less multiplication 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci # first phase of the reduction 6762306a36Sopenharmony_ci movaps DATA, T3 6862306a36Sopenharmony_ci psllq $1, T3 6962306a36Sopenharmony_ci pxor DATA, T3 7062306a36Sopenharmony_ci psllq $5, T3 7162306a36Sopenharmony_ci pxor DATA, T3 7262306a36Sopenharmony_ci psllq $57, T3 7362306a36Sopenharmony_ci movaps T3, T2 7462306a36Sopenharmony_ci pslldq $8, T2 7562306a36Sopenharmony_ci psrldq $8, T3 7662306a36Sopenharmony_ci pxor T2, DATA 7762306a36Sopenharmony_ci pxor T3, T1 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci # second phase of the reduction 8062306a36Sopenharmony_ci movaps DATA, T2 8162306a36Sopenharmony_ci psrlq $5, T2 8262306a36Sopenharmony_ci pxor DATA, T2 8362306a36Sopenharmony_ci psrlq $1, T2 8462306a36Sopenharmony_ci pxor DATA, T2 8562306a36Sopenharmony_ci psrlq $1, T2 8662306a36Sopenharmony_ci pxor T2, T1 8762306a36Sopenharmony_ci pxor T1, DATA 8862306a36Sopenharmony_ci RET 8962306a36Sopenharmony_ciSYM_FUNC_END(__clmul_gf128mul_ble) 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci/* void clmul_ghash_mul(char *dst, const le128 *shash) */ 9262306a36Sopenharmony_ciSYM_FUNC_START(clmul_ghash_mul) 9362306a36Sopenharmony_ci FRAME_BEGIN 9462306a36Sopenharmony_ci movups (%rdi), DATA 9562306a36Sopenharmony_ci movups (%rsi), SHASH 9662306a36Sopenharmony_ci movaps .Lbswap_mask(%rip), BSWAP 9762306a36Sopenharmony_ci pshufb BSWAP, DATA 9862306a36Sopenharmony_ci call __clmul_gf128mul_ble 9962306a36Sopenharmony_ci pshufb BSWAP, DATA 10062306a36Sopenharmony_ci movups DATA, (%rdi) 10162306a36Sopenharmony_ci FRAME_END 10262306a36Sopenharmony_ci RET 10362306a36Sopenharmony_ciSYM_FUNC_END(clmul_ghash_mul) 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci/* 10662306a36Sopenharmony_ci * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 10762306a36Sopenharmony_ci * const le128 *shash); 10862306a36Sopenharmony_ci */ 10962306a36Sopenharmony_ciSYM_FUNC_START(clmul_ghash_update) 11062306a36Sopenharmony_ci FRAME_BEGIN 11162306a36Sopenharmony_ci cmp $16, %rdx 11262306a36Sopenharmony_ci jb .Lupdate_just_ret # check length 11362306a36Sopenharmony_ci movaps .Lbswap_mask(%rip), BSWAP 11462306a36Sopenharmony_ci movups (%rdi), DATA 11562306a36Sopenharmony_ci movups (%rcx), SHASH 11662306a36Sopenharmony_ci pshufb BSWAP, DATA 11762306a36Sopenharmony_ci.align 4 11862306a36Sopenharmony_ci.Lupdate_loop: 11962306a36Sopenharmony_ci movups (%rsi), IN1 12062306a36Sopenharmony_ci pshufb BSWAP, IN1 12162306a36Sopenharmony_ci pxor IN1, DATA 12262306a36Sopenharmony_ci call __clmul_gf128mul_ble 12362306a36Sopenharmony_ci sub $16, %rdx 12462306a36Sopenharmony_ci add $16, %rsi 12562306a36Sopenharmony_ci cmp $16, %rdx 12662306a36Sopenharmony_ci jge .Lupdate_loop 12762306a36Sopenharmony_ci pshufb BSWAP, DATA 12862306a36Sopenharmony_ci movups DATA, (%rdi) 12962306a36Sopenharmony_ci.Lupdate_just_ret: 13062306a36Sopenharmony_ci FRAME_END 13162306a36Sopenharmony_ci RET 13262306a36Sopenharmony_ciSYM_FUNC_END(clmul_ghash_update) 133