162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2012 Xyratex Technology Limited 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 662306a36Sopenharmony_ci * calculation. 762306a36Sopenharmony_ci * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) 862306a36Sopenharmony_ci * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found 962306a36Sopenharmony_ci * at: 1062306a36Sopenharmony_ci * http://www.intel.com/products/processor/manuals/ 1162306a36Sopenharmony_ci * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 1262306a36Sopenharmony_ci * Volume 2B: Instruction Set Reference, N-Z 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> 1562306a36Sopenharmony_ci * Alexander Boyko <Alexander_Boyko@xyratex.com> 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <linux/linkage.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci.section .rodata 2262306a36Sopenharmony_ci.align 16 2362306a36Sopenharmony_ci/* 2462306a36Sopenharmony_ci * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 2562306a36Sopenharmony_ci * #define CONSTANT_R1 0x154442bd4LL 2662306a36Sopenharmony_ci * 2762306a36Sopenharmony_ci * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 2862306a36Sopenharmony_ci * #define CONSTANT_R2 0x1c6e41596LL 2962306a36Sopenharmony_ci */ 3062306a36Sopenharmony_ci.Lconstant_R2R1: 3162306a36Sopenharmony_ci .octa 0x00000001c6e415960000000154442bd4 3262306a36Sopenharmony_ci/* 3362306a36Sopenharmony_ci * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 3462306a36Sopenharmony_ci * #define CONSTANT_R3 0x1751997d0LL 3562306a36Sopenharmony_ci * 3662306a36Sopenharmony_ci * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e 3762306a36Sopenharmony_ci * #define CONSTANT_R4 0x0ccaa009eLL 3862306a36Sopenharmony_ci */ 3962306a36Sopenharmony_ci.Lconstant_R4R3: 4062306a36Sopenharmony_ci .octa 0x00000000ccaa009e00000001751997d0 4162306a36Sopenharmony_ci/* 4262306a36Sopenharmony_ci * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 4362306a36Sopenharmony_ci * #define CONSTANT_R5 0x163cd6124LL 4462306a36Sopenharmony_ci */ 4562306a36Sopenharmony_ci.Lconstant_R5: 4662306a36Sopenharmony_ci .octa 0x00000000000000000000000163cd6124 4762306a36Sopenharmony_ci.Lconstant_mask32: 4862306a36Sopenharmony_ci .octa 0x000000000000000000000000FFFFFFFF 4962306a36Sopenharmony_ci/* 5062306a36Sopenharmony_ci * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL 5162306a36Sopenharmony_ci * 5262306a36Sopenharmony_ci * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL 5362306a36Sopenharmony_ci * #define CONSTANT_RU 0x1F7011641LL 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_ci.Lconstant_RUpoly: 5662306a36Sopenharmony_ci .octa 0x00000001F701164100000001DB710641 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci#define CONSTANT %xmm0 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#ifdef __x86_64__ 6162306a36Sopenharmony_ci#define BUF %rdi 6262306a36Sopenharmony_ci#define LEN %rsi 6362306a36Sopenharmony_ci#define CRC %edx 6462306a36Sopenharmony_ci#else 6562306a36Sopenharmony_ci#define BUF %eax 6662306a36Sopenharmony_ci#define LEN %edx 6762306a36Sopenharmony_ci#define CRC %ecx 6862306a36Sopenharmony_ci#endif 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci.text 7362306a36Sopenharmony_ci/** 7462306a36Sopenharmony_ci * Calculate crc32 7562306a36Sopenharmony_ci * BUF - buffer (16 bytes aligned) 7662306a36Sopenharmony_ci * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 7762306a36Sopenharmony_ci * CRC - initial crc32 7862306a36Sopenharmony_ci * return %eax crc32 7962306a36Sopenharmony_ci * uint crc32_pclmul_le_16(unsigned char const *buffer, 8062306a36Sopenharmony_ci * size_t len, uint crc32) 8162306a36Sopenharmony_ci */ 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ciSYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ 8462306a36Sopenharmony_ci movdqa (BUF), %xmm1 8562306a36Sopenharmony_ci movdqa 0x10(BUF), %xmm2 8662306a36Sopenharmony_ci movdqa 0x20(BUF), %xmm3 8762306a36Sopenharmony_ci movdqa 0x30(BUF), %xmm4 8862306a36Sopenharmony_ci movd CRC, CONSTANT 8962306a36Sopenharmony_ci pxor CONSTANT, %xmm1 9062306a36Sopenharmony_ci sub $0x40, LEN 9162306a36Sopenharmony_ci add $0x40, BUF 9262306a36Sopenharmony_ci cmp $0x40, LEN 9362306a36Sopenharmony_ci jb .Lless_64 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci#ifdef __x86_64__ 9662306a36Sopenharmony_ci movdqa .Lconstant_R2R1(%rip), CONSTANT 9762306a36Sopenharmony_ci#else 9862306a36Sopenharmony_ci movdqa .Lconstant_R2R1, CONSTANT 9962306a36Sopenharmony_ci#endif 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci.Lloop_64:/* 64 bytes Full cache line folding */ 10262306a36Sopenharmony_ci prefetchnta 0x40(BUF) 10362306a36Sopenharmony_ci movdqa %xmm1, %xmm5 10462306a36Sopenharmony_ci movdqa %xmm2, %xmm6 10562306a36Sopenharmony_ci movdqa %xmm3, %xmm7 10662306a36Sopenharmony_ci#ifdef __x86_64__ 10762306a36Sopenharmony_ci movdqa %xmm4, %xmm8 10862306a36Sopenharmony_ci#endif 10962306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 11062306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm2 11162306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm3 11262306a36Sopenharmony_ci#ifdef __x86_64__ 11362306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm4 11462306a36Sopenharmony_ci#endif 11562306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm5 11662306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm6 11762306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm7 11862306a36Sopenharmony_ci#ifdef __x86_64__ 11962306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm8 12062306a36Sopenharmony_ci#endif 12162306a36Sopenharmony_ci pxor %xmm5, %xmm1 12262306a36Sopenharmony_ci pxor %xmm6, %xmm2 12362306a36Sopenharmony_ci pxor %xmm7, %xmm3 12462306a36Sopenharmony_ci#ifdef __x86_64__ 12562306a36Sopenharmony_ci pxor %xmm8, %xmm4 12662306a36Sopenharmony_ci#else 12762306a36Sopenharmony_ci /* xmm8 unsupported for x32 */ 12862306a36Sopenharmony_ci movdqa %xmm4, %xmm5 12962306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm4 13062306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm5 13162306a36Sopenharmony_ci pxor %xmm5, %xmm4 13262306a36Sopenharmony_ci#endif 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci pxor (BUF), %xmm1 13562306a36Sopenharmony_ci pxor 0x10(BUF), %xmm2 13662306a36Sopenharmony_ci pxor 0x20(BUF), %xmm3 13762306a36Sopenharmony_ci pxor 0x30(BUF), %xmm4 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci sub $0x40, LEN 14062306a36Sopenharmony_ci add $0x40, BUF 14162306a36Sopenharmony_ci cmp $0x40, LEN 14262306a36Sopenharmony_ci jge .Lloop_64 14362306a36Sopenharmony_ci.Lless_64:/* Folding cache line into 128bit */ 14462306a36Sopenharmony_ci#ifdef __x86_64__ 14562306a36Sopenharmony_ci movdqa .Lconstant_R4R3(%rip), CONSTANT 14662306a36Sopenharmony_ci#else 14762306a36Sopenharmony_ci movdqa .Lconstant_R4R3, CONSTANT 14862306a36Sopenharmony_ci#endif 14962306a36Sopenharmony_ci prefetchnta (BUF) 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci movdqa %xmm1, %xmm5 15262306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 15362306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm5 15462306a36Sopenharmony_ci pxor %xmm5, %xmm1 15562306a36Sopenharmony_ci pxor %xmm2, %xmm1 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci movdqa %xmm1, %xmm5 15862306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 15962306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm5 16062306a36Sopenharmony_ci pxor %xmm5, %xmm1 16162306a36Sopenharmony_ci pxor %xmm3, %xmm1 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci movdqa %xmm1, %xmm5 16462306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 16562306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm5 16662306a36Sopenharmony_ci pxor %xmm5, %xmm1 16762306a36Sopenharmony_ci pxor %xmm4, %xmm1 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci cmp $0x10, LEN 17062306a36Sopenharmony_ci jb .Lfold_64 17162306a36Sopenharmony_ci.Lloop_16:/* Folding rest buffer into 128bit */ 17262306a36Sopenharmony_ci movdqa %xmm1, %xmm5 17362306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 17462306a36Sopenharmony_ci pclmulqdq $0x11, CONSTANT, %xmm5 17562306a36Sopenharmony_ci pxor %xmm5, %xmm1 17662306a36Sopenharmony_ci pxor (BUF), %xmm1 17762306a36Sopenharmony_ci sub $0x10, LEN 17862306a36Sopenharmony_ci add $0x10, BUF 17962306a36Sopenharmony_ci cmp $0x10, LEN 18062306a36Sopenharmony_ci jge .Lloop_16 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci.Lfold_64: 18362306a36Sopenharmony_ci /* perform the last 64 bit fold, also adds 32 zeroes 18462306a36Sopenharmony_ci * to the input stream */ 18562306a36Sopenharmony_ci pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ 18662306a36Sopenharmony_ci psrldq $0x08, %xmm1 18762306a36Sopenharmony_ci pxor CONSTANT, %xmm1 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci /* final 32-bit fold */ 19062306a36Sopenharmony_ci movdqa %xmm1, %xmm2 19162306a36Sopenharmony_ci#ifdef __x86_64__ 19262306a36Sopenharmony_ci movdqa .Lconstant_R5(%rip), CONSTANT 19362306a36Sopenharmony_ci movdqa .Lconstant_mask32(%rip), %xmm3 19462306a36Sopenharmony_ci#else 19562306a36Sopenharmony_ci movdqa .Lconstant_R5, CONSTANT 19662306a36Sopenharmony_ci movdqa .Lconstant_mask32, %xmm3 19762306a36Sopenharmony_ci#endif 19862306a36Sopenharmony_ci psrldq $0x04, %xmm2 19962306a36Sopenharmony_ci pand %xmm3, %xmm1 20062306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 20162306a36Sopenharmony_ci pxor %xmm2, %xmm1 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ 20462306a36Sopenharmony_ci#ifdef __x86_64__ 20562306a36Sopenharmony_ci movdqa .Lconstant_RUpoly(%rip), CONSTANT 20662306a36Sopenharmony_ci#else 20762306a36Sopenharmony_ci movdqa .Lconstant_RUpoly, CONSTANT 20862306a36Sopenharmony_ci#endif 20962306a36Sopenharmony_ci movdqa %xmm1, %xmm2 21062306a36Sopenharmony_ci pand %xmm3, %xmm1 21162306a36Sopenharmony_ci pclmulqdq $0x10, CONSTANT, %xmm1 21262306a36Sopenharmony_ci pand %xmm3, %xmm1 21362306a36Sopenharmony_ci pclmulqdq $0x00, CONSTANT, %xmm1 21462306a36Sopenharmony_ci pxor %xmm2, %xmm1 21562306a36Sopenharmony_ci pextrd $0x01, %xmm1, %eax 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci RET 21862306a36Sopenharmony_ciSYM_FUNC_END(crc32_pclmul_le_16) 219