162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 762306a36Sopenharmony_ci * it under the terms of the GNU General Public License version 2 as 862306a36Sopenharmony_ci * published by the Free Software Foundation. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci/* GPL HEADER START 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 1662306a36Sopenharmony_ci * it under the terms of the GNU General Public License version 2 only, 1762306a36Sopenharmony_ci * as published by the Free Software Foundation. 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 2062306a36Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 2162306a36Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2262306a36Sopenharmony_ci * General Public License version 2 for more details (a copy is included 2362306a36Sopenharmony_ci * in the LICENSE file that accompanied this code). 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * You should have received a copy of the GNU General Public License 2662306a36Sopenharmony_ci * version 2 along with this program; If not, see http://www.gnu.org/licenses 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * Please visit http://www.xyratex.com/contact if you need additional 2962306a36Sopenharmony_ci * information or have any questions. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * GPL HEADER END 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/* 3562306a36Sopenharmony_ci * Copyright 2012 Xyratex Technology Limited 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 3862306a36Sopenharmony_ci * calculation. 3962306a36Sopenharmony_ci * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) 4062306a36Sopenharmony_ci * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found 4162306a36Sopenharmony_ci * at: 4262306a36Sopenharmony_ci * https://www.intel.com/products/processor/manuals/ 4362306a36Sopenharmony_ci * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 4462306a36Sopenharmony_ci * Volume 2B: Instruction Set Reference, N-Z 4562306a36Sopenharmony_ci * 4662306a36Sopenharmony_ci * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> 4762306a36Sopenharmony_ci * Alexander Boyko <Alexander_Boyko@xyratex.com> 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#include <linux/linkage.h> 5162306a36Sopenharmony_ci#include <asm/assembler.h> 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci .text 5462306a36Sopenharmony_ci .align 6 5562306a36Sopenharmony_ci .arch armv8-a 5662306a36Sopenharmony_ci .arch_extension crc 5762306a36Sopenharmony_ci .fpu crypto-neon-fp-armv8 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci.Lcrc32_constants: 6062306a36Sopenharmony_ci /* 6162306a36Sopenharmony_ci * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 6262306a36Sopenharmony_ci * #define CONSTANT_R1 0x154442bd4LL 6362306a36Sopenharmony_ci * 6462306a36Sopenharmony_ci * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 6562306a36Sopenharmony_ci * #define CONSTANT_R2 0x1c6e41596LL 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci .quad 0x0000000154442bd4 6862306a36Sopenharmony_ci .quad 0x00000001c6e41596 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci /* 7162306a36Sopenharmony_ci * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 7262306a36Sopenharmony_ci * #define CONSTANT_R3 0x1751997d0LL 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e 7562306a36Sopenharmony_ci * #define CONSTANT_R4 0x0ccaa009eLL 7662306a36Sopenharmony_ci */ 7762306a36Sopenharmony_ci .quad 0x00000001751997d0 7862306a36Sopenharmony_ci .quad 0x00000000ccaa009e 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci /* 8162306a36Sopenharmony_ci * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 8262306a36Sopenharmony_ci * #define CONSTANT_R5 0x163cd6124LL 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci .quad 0x0000000163cd6124 8562306a36Sopenharmony_ci .quad 0x00000000FFFFFFFF 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci /* 8862306a36Sopenharmony_ci * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL 8962306a36Sopenharmony_ci * 9062306a36Sopenharmony_ci * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` 9162306a36Sopenharmony_ci * = 0x1F7011641LL 9262306a36Sopenharmony_ci * #define CONSTANT_RU 0x1F7011641LL 9362306a36Sopenharmony_ci */ 9462306a36Sopenharmony_ci .quad 0x00000001DB710641 9562306a36Sopenharmony_ci .quad 0x00000001F7011641 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci.Lcrc32c_constants: 9862306a36Sopenharmony_ci .quad 0x00000000740eef02 9962306a36Sopenharmony_ci .quad 0x000000009e4addf8 10062306a36Sopenharmony_ci .quad 0x00000000f20c0dfe 10162306a36Sopenharmony_ci .quad 0x000000014cd00bd6 10262306a36Sopenharmony_ci .quad 0x00000000dd45aab8 10362306a36Sopenharmony_ci .quad 0x00000000FFFFFFFF 10462306a36Sopenharmony_ci .quad 0x0000000105ec76f0 10562306a36Sopenharmony_ci .quad 0x00000000dea713f1 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci dCONSTANTl .req d0 10862306a36Sopenharmony_ci dCONSTANTh .req d1 10962306a36Sopenharmony_ci qCONSTANT .req q0 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci BUF .req r0 11262306a36Sopenharmony_ci LEN .req r1 11362306a36Sopenharmony_ci CRC .req r2 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci qzr .req q9 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci /** 11862306a36Sopenharmony_ci * Calculate crc32 11962306a36Sopenharmony_ci * BUF - buffer 12062306a36Sopenharmony_ci * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63 12162306a36Sopenharmony_ci * CRC - initial crc32 12262306a36Sopenharmony_ci * return %eax crc32 12362306a36Sopenharmony_ci * uint crc32_pmull_le(unsigned char const *buffer, 12462306a36Sopenharmony_ci * size_t len, uint crc32) 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ciENTRY(crc32_pmull_le) 12762306a36Sopenharmony_ci adr r3, .Lcrc32_constants 12862306a36Sopenharmony_ci b 0f 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ciENTRY(crc32c_pmull_le) 13162306a36Sopenharmony_ci adr r3, .Lcrc32c_constants 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci0: bic LEN, LEN, #15 13462306a36Sopenharmony_ci vld1.8 {q1-q2}, [BUF, :128]! 13562306a36Sopenharmony_ci vld1.8 {q3-q4}, [BUF, :128]! 13662306a36Sopenharmony_ci vmov.i8 qzr, #0 13762306a36Sopenharmony_ci vmov.i8 qCONSTANT, #0 13862306a36Sopenharmony_ci vmov.32 dCONSTANTl[0], CRC 13962306a36Sopenharmony_ci veor.8 d2, d2, dCONSTANTl 14062306a36Sopenharmony_ci sub LEN, LEN, #0x40 14162306a36Sopenharmony_ci cmp LEN, #0x40 14262306a36Sopenharmony_ci blt less_64 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci vld1.64 {qCONSTANT}, [r3] 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ciloop_64: /* 64 bytes Full cache line folding */ 14762306a36Sopenharmony_ci sub LEN, LEN, #0x40 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci vmull.p64 q5, d3, dCONSTANTh 15062306a36Sopenharmony_ci vmull.p64 q6, d5, dCONSTANTh 15162306a36Sopenharmony_ci vmull.p64 q7, d7, dCONSTANTh 15262306a36Sopenharmony_ci vmull.p64 q8, d9, dCONSTANTh 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci vmull.p64 q1, d2, dCONSTANTl 15562306a36Sopenharmony_ci vmull.p64 q2, d4, dCONSTANTl 15662306a36Sopenharmony_ci vmull.p64 q3, d6, dCONSTANTl 15762306a36Sopenharmony_ci vmull.p64 q4, d8, dCONSTANTl 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci veor.8 q1, q1, q5 16062306a36Sopenharmony_ci vld1.8 {q5}, [BUF, :128]! 16162306a36Sopenharmony_ci veor.8 q2, q2, q6 16262306a36Sopenharmony_ci vld1.8 {q6}, [BUF, :128]! 16362306a36Sopenharmony_ci veor.8 q3, q3, q7 16462306a36Sopenharmony_ci vld1.8 {q7}, [BUF, :128]! 16562306a36Sopenharmony_ci veor.8 q4, q4, q8 16662306a36Sopenharmony_ci vld1.8 {q8}, [BUF, :128]! 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci veor.8 q1, q1, q5 16962306a36Sopenharmony_ci veor.8 q2, q2, q6 17062306a36Sopenharmony_ci veor.8 q3, q3, q7 17162306a36Sopenharmony_ci veor.8 q4, q4, q8 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci cmp LEN, #0x40 17462306a36Sopenharmony_ci bge loop_64 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ciless_64: /* Folding cache line into 128bit */ 17762306a36Sopenharmony_ci vldr dCONSTANTl, [r3, #16] 17862306a36Sopenharmony_ci vldr dCONSTANTh, [r3, #24] 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci vmull.p64 q5, d3, dCONSTANTh 18162306a36Sopenharmony_ci vmull.p64 q1, d2, dCONSTANTl 18262306a36Sopenharmony_ci veor.8 q1, q1, q5 18362306a36Sopenharmony_ci veor.8 q1, q1, q2 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci vmull.p64 q5, d3, dCONSTANTh 18662306a36Sopenharmony_ci vmull.p64 q1, d2, dCONSTANTl 18762306a36Sopenharmony_ci veor.8 q1, q1, q5 18862306a36Sopenharmony_ci veor.8 q1, q1, q3 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci vmull.p64 q5, d3, dCONSTANTh 19162306a36Sopenharmony_ci vmull.p64 q1, d2, dCONSTANTl 19262306a36Sopenharmony_ci veor.8 q1, q1, q5 19362306a36Sopenharmony_ci veor.8 q1, q1, q4 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci teq LEN, #0 19662306a36Sopenharmony_ci beq fold_64 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ciloop_16: /* Folding rest buffer into 128bit */ 19962306a36Sopenharmony_ci subs LEN, LEN, #0x10 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci vld1.8 {q2}, [BUF, :128]! 20262306a36Sopenharmony_ci vmull.p64 q5, d3, dCONSTANTh 20362306a36Sopenharmony_ci vmull.p64 q1, d2, dCONSTANTl 20462306a36Sopenharmony_ci veor.8 q1, q1, q5 20562306a36Sopenharmony_ci veor.8 q1, q1, q2 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci bne loop_16 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_cifold_64: 21062306a36Sopenharmony_ci /* perform the last 64 bit fold, also adds 32 zeroes 21162306a36Sopenharmony_ci * to the input stream */ 21262306a36Sopenharmony_ci vmull.p64 q2, d2, dCONSTANTh 21362306a36Sopenharmony_ci vext.8 q1, q1, qzr, #8 21462306a36Sopenharmony_ci veor.8 q1, q1, q2 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci /* final 32-bit fold */ 21762306a36Sopenharmony_ci vldr dCONSTANTl, [r3, #32] 21862306a36Sopenharmony_ci vldr d6, [r3, #40] 21962306a36Sopenharmony_ci vmov.i8 d7, #0 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci vext.8 q2, q1, qzr, #4 22262306a36Sopenharmony_ci vand.8 d2, d2, d6 22362306a36Sopenharmony_ci vmull.p64 q1, d2, dCONSTANTl 22462306a36Sopenharmony_ci veor.8 q1, q1, q2 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ 22762306a36Sopenharmony_ci vldr dCONSTANTl, [r3, #48] 22862306a36Sopenharmony_ci vldr dCONSTANTh, [r3, #56] 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci vand.8 q2, q1, q3 23162306a36Sopenharmony_ci vext.8 q2, qzr, q2, #8 23262306a36Sopenharmony_ci vmull.p64 q2, d5, dCONSTANTh 23362306a36Sopenharmony_ci vand.8 q2, q2, q3 23462306a36Sopenharmony_ci vmull.p64 q2, d4, dCONSTANTl 23562306a36Sopenharmony_ci veor.8 q1, q1, q2 23662306a36Sopenharmony_ci vmov r0, s5 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci bx lr 23962306a36Sopenharmony_ciENDPROC(crc32_pmull_le) 24062306a36Sopenharmony_ciENDPROC(crc32c_pmull_le) 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci .macro __crc32, c 24362306a36Sopenharmony_ci subs ip, r2, #8 24462306a36Sopenharmony_ci bmi .Ltail\c 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci tst r1, #3 24762306a36Sopenharmony_ci bne .Lunaligned\c 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci teq ip, #0 25062306a36Sopenharmony_ci.Laligned8\c: 25162306a36Sopenharmony_ci ldrd r2, r3, [r1], #8 25262306a36Sopenharmony_ciARM_BE8(rev r2, r2 ) 25362306a36Sopenharmony_ciARM_BE8(rev r3, r3 ) 25462306a36Sopenharmony_ci crc32\c\()w r0, r0, r2 25562306a36Sopenharmony_ci crc32\c\()w r0, r0, r3 25662306a36Sopenharmony_ci bxeq lr 25762306a36Sopenharmony_ci subs ip, ip, #8 25862306a36Sopenharmony_ci bpl .Laligned8\c 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci.Ltail\c: 26162306a36Sopenharmony_ci tst ip, #4 26262306a36Sopenharmony_ci beq 2f 26362306a36Sopenharmony_ci ldr r3, [r1], #4 26462306a36Sopenharmony_ciARM_BE8(rev r3, r3 ) 26562306a36Sopenharmony_ci crc32\c\()w r0, r0, r3 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci2: tst ip, #2 26862306a36Sopenharmony_ci beq 1f 26962306a36Sopenharmony_ci ldrh r3, [r1], #2 27062306a36Sopenharmony_ciARM_BE8(rev16 r3, r3 ) 27162306a36Sopenharmony_ci crc32\c\()h r0, r0, r3 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci1: tst ip, #1 27462306a36Sopenharmony_ci bxeq lr 27562306a36Sopenharmony_ci ldrb r3, [r1] 27662306a36Sopenharmony_ci crc32\c\()b r0, r0, r3 27762306a36Sopenharmony_ci bx lr 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci.Lunaligned\c: 28062306a36Sopenharmony_ci tst r1, #1 28162306a36Sopenharmony_ci beq 2f 28262306a36Sopenharmony_ci ldrb r3, [r1], #1 28362306a36Sopenharmony_ci subs r2, r2, #1 28462306a36Sopenharmony_ci crc32\c\()b r0, r0, r3 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci tst r1, #2 28762306a36Sopenharmony_ci beq 0f 28862306a36Sopenharmony_ci2: ldrh r3, [r1], #2 28962306a36Sopenharmony_ci subs r2, r2, #2 29062306a36Sopenharmony_ciARM_BE8(rev16 r3, r3 ) 29162306a36Sopenharmony_ci crc32\c\()h r0, r0, r3 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci0: subs ip, r2, #8 29462306a36Sopenharmony_ci bpl .Laligned8\c 29562306a36Sopenharmony_ci b .Ltail\c 29662306a36Sopenharmony_ci .endm 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci .align 5 29962306a36Sopenharmony_ciENTRY(crc32_armv8_le) 30062306a36Sopenharmony_ci __crc32 30162306a36Sopenharmony_ciENDPROC(crc32_armv8_le) 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci .align 5 30462306a36Sopenharmony_ciENTRY(crc32c_armv8_le) 30562306a36Sopenharmony_ci __crc32 c 30662306a36Sopenharmony_ciENDPROC(crc32c_armv8_le) 307