162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * arch/arm64/lib/xor-neon.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Authors: Jackie Liu <liuyun01@kylinos.cn> 662306a36Sopenharmony_ci * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/raid/xor.h> 1062306a36Sopenharmony_ci#include <linux/module.h> 1162306a36Sopenharmony_ci#include <asm/neon-intrinsics.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_cistatic void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, 1462306a36Sopenharmony_ci const unsigned long * __restrict p2) 1562306a36Sopenharmony_ci{ 1662306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 1762306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 2062306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci do { 2362306a36Sopenharmony_ci /* p1 ^= p2 */ 2462306a36Sopenharmony_ci v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 2562306a36Sopenharmony_ci v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 2662306a36Sopenharmony_ci v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 2762306a36Sopenharmony_ci v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci /* store */ 3062306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 3162306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 3262306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 3362306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci dp1 += 8; 3662306a36Sopenharmony_ci dp2 += 8; 3762306a36Sopenharmony_ci } while (--lines > 0); 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistatic void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, 4162306a36Sopenharmony_ci const unsigned long * __restrict p2, 4262306a36Sopenharmony_ci const unsigned long * __restrict p3) 4362306a36Sopenharmony_ci{ 4462306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 4562306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 4662306a36Sopenharmony_ci uint64_t *dp3 = (uint64_t *)p3; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 4962306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci do { 5262306a36Sopenharmony_ci /* p1 ^= p2 */ 5362306a36Sopenharmony_ci v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 5462306a36Sopenharmony_ci v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 5562306a36Sopenharmony_ci v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 5662306a36Sopenharmony_ci v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci /* p1 ^= p3 */ 5962306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); 6062306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); 6162306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); 6262306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci /* store */ 6562306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 6662306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 6762306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 6862306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci dp1 += 8; 7162306a36Sopenharmony_ci dp2 += 8; 7262306a36Sopenharmony_ci dp3 += 8; 7362306a36Sopenharmony_ci } while (--lines > 0); 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_cistatic void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, 7762306a36Sopenharmony_ci const unsigned long * __restrict p2, 7862306a36Sopenharmony_ci const unsigned long * __restrict p3, 7962306a36Sopenharmony_ci const unsigned long * __restrict p4) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 8262306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 8362306a36Sopenharmony_ci uint64_t *dp3 = (uint64_t *)p3; 8462306a36Sopenharmony_ci uint64_t *dp4 = (uint64_t *)p4; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 8762306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci do { 9062306a36Sopenharmony_ci /* p1 ^= p2 */ 9162306a36Sopenharmony_ci v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 9262306a36Sopenharmony_ci v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 9362306a36Sopenharmony_ci v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 9462306a36Sopenharmony_ci v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci /* p1 ^= p3 */ 9762306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); 9862306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); 9962306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); 10062306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci /* p1 ^= p4 */ 10362306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); 10462306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); 10562306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); 10662306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci /* store */ 10962306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 11062306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 11162306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 11262306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci dp1 += 8; 11562306a36Sopenharmony_ci dp2 += 8; 11662306a36Sopenharmony_ci dp3 += 8; 11762306a36Sopenharmony_ci dp4 += 8; 11862306a36Sopenharmony_ci } while (--lines > 0); 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_cistatic void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, 12262306a36Sopenharmony_ci const unsigned long * __restrict p2, 12362306a36Sopenharmony_ci const unsigned long * __restrict p3, 12462306a36Sopenharmony_ci const unsigned long * __restrict p4, 12562306a36Sopenharmony_ci const unsigned long * __restrict p5) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 12862306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 12962306a36Sopenharmony_ci uint64_t *dp3 = (uint64_t *)p3; 13062306a36Sopenharmony_ci uint64_t *dp4 = (uint64_t *)p4; 13162306a36Sopenharmony_ci uint64_t *dp5 = (uint64_t *)p5; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 13462306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci do { 13762306a36Sopenharmony_ci /* p1 ^= p2 */ 13862306a36Sopenharmony_ci v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); 13962306a36Sopenharmony_ci v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); 14062306a36Sopenharmony_ci v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); 14162306a36Sopenharmony_ci v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci /* p1 ^= p3 */ 14462306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); 14562306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); 14662306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); 14762306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci /* p1 ^= p4 */ 15062306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); 15162306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); 15262306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); 15362306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci /* p1 ^= p5 */ 15662306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp5 + 0)); 15762306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp5 + 2)); 15862306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp5 + 4)); 15962306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp5 + 6)); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* store */ 16262306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 16362306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 16462306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 16562306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci dp1 += 8; 16862306a36Sopenharmony_ci dp2 += 8; 16962306a36Sopenharmony_ci dp3 += 8; 17062306a36Sopenharmony_ci dp4 += 8; 17162306a36Sopenharmony_ci dp5 += 8; 17262306a36Sopenharmony_ci } while (--lines > 0); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistruct xor_block_template xor_block_inner_neon __ro_after_init = { 17662306a36Sopenharmony_ci .name = "__inner_neon__", 17762306a36Sopenharmony_ci .do_2 = xor_arm64_neon_2, 17862306a36Sopenharmony_ci .do_3 = xor_arm64_neon_3, 17962306a36Sopenharmony_ci .do_4 = xor_arm64_neon_4, 18062306a36Sopenharmony_ci .do_5 = xor_arm64_neon_5, 18162306a36Sopenharmony_ci}; 18262306a36Sopenharmony_ciEXPORT_SYMBOL(xor_block_inner_neon); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_cistatic inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) 18562306a36Sopenharmony_ci{ 18662306a36Sopenharmony_ci uint64x2_t res; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n" 18962306a36Sopenharmony_ci "eor3 %0.16b, %1.16b, %2.16b, %3.16b" 19062306a36Sopenharmony_ci : "=w"(res) : "w"(p), "w"(q), "w"(r)); 19162306a36Sopenharmony_ci return res; 19262306a36Sopenharmony_ci} 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_cistatic void xor_arm64_eor3_3(unsigned long bytes, 19562306a36Sopenharmony_ci unsigned long * __restrict p1, 19662306a36Sopenharmony_ci const unsigned long * __restrict p2, 19762306a36Sopenharmony_ci const unsigned long * __restrict p3) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 20062306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 20162306a36Sopenharmony_ci uint64_t *dp3 = (uint64_t *)p3; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 20462306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci do { 20762306a36Sopenharmony_ci /* p1 ^= p2 ^ p3 */ 20862306a36Sopenharmony_ci v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0), 20962306a36Sopenharmony_ci vld1q_u64(dp3 + 0)); 21062306a36Sopenharmony_ci v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2), 21162306a36Sopenharmony_ci vld1q_u64(dp3 + 2)); 21262306a36Sopenharmony_ci v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4), 21362306a36Sopenharmony_ci vld1q_u64(dp3 + 4)); 21462306a36Sopenharmony_ci v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6), 21562306a36Sopenharmony_ci vld1q_u64(dp3 + 6)); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* store */ 21862306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 21962306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 22062306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 22162306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci dp1 += 8; 22462306a36Sopenharmony_ci dp2 += 8; 22562306a36Sopenharmony_ci dp3 += 8; 22662306a36Sopenharmony_ci } while (--lines > 0); 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic void xor_arm64_eor3_4(unsigned long bytes, 23062306a36Sopenharmony_ci unsigned long * __restrict p1, 23162306a36Sopenharmony_ci const unsigned long * __restrict p2, 23262306a36Sopenharmony_ci const unsigned long * __restrict p3, 23362306a36Sopenharmony_ci const unsigned long * __restrict p4) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 23662306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 23762306a36Sopenharmony_ci uint64_t *dp3 = (uint64_t *)p3; 23862306a36Sopenharmony_ci uint64_t *dp4 = (uint64_t *)p4; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 24162306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci do { 24462306a36Sopenharmony_ci /* p1 ^= p2 ^ p3 */ 24562306a36Sopenharmony_ci v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0), 24662306a36Sopenharmony_ci vld1q_u64(dp3 + 0)); 24762306a36Sopenharmony_ci v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2), 24862306a36Sopenharmony_ci vld1q_u64(dp3 + 2)); 24962306a36Sopenharmony_ci v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4), 25062306a36Sopenharmony_ci vld1q_u64(dp3 + 4)); 25162306a36Sopenharmony_ci v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6), 25262306a36Sopenharmony_ci vld1q_u64(dp3 + 6)); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci /* p1 ^= p4 */ 25562306a36Sopenharmony_ci v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); 25662306a36Sopenharmony_ci v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); 25762306a36Sopenharmony_ci v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); 25862306a36Sopenharmony_ci v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci /* store */ 26162306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 26262306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 26362306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 26462306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci dp1 += 8; 26762306a36Sopenharmony_ci dp2 += 8; 26862306a36Sopenharmony_ci dp3 += 8; 26962306a36Sopenharmony_ci dp4 += 8; 27062306a36Sopenharmony_ci } while (--lines > 0); 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic void xor_arm64_eor3_5(unsigned long bytes, 27462306a36Sopenharmony_ci unsigned long * __restrict p1, 27562306a36Sopenharmony_ci const unsigned long * __restrict p2, 27662306a36Sopenharmony_ci const unsigned long * __restrict p3, 27762306a36Sopenharmony_ci const unsigned long * __restrict p4, 27862306a36Sopenharmony_ci const unsigned long * __restrict p5) 27962306a36Sopenharmony_ci{ 28062306a36Sopenharmony_ci uint64_t *dp1 = (uint64_t *)p1; 28162306a36Sopenharmony_ci uint64_t *dp2 = (uint64_t *)p2; 28262306a36Sopenharmony_ci uint64_t *dp3 = (uint64_t *)p3; 28362306a36Sopenharmony_ci uint64_t *dp4 = (uint64_t *)p4; 28462306a36Sopenharmony_ci uint64_t *dp5 = (uint64_t *)p5; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci register uint64x2_t v0, v1, v2, v3; 28762306a36Sopenharmony_ci long lines = bytes / (sizeof(uint64x2_t) * 4); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci do { 29062306a36Sopenharmony_ci /* p1 ^= p2 ^ p3 */ 29162306a36Sopenharmony_ci v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0), 29262306a36Sopenharmony_ci vld1q_u64(dp3 + 0)); 29362306a36Sopenharmony_ci v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2), 29462306a36Sopenharmony_ci vld1q_u64(dp3 + 2)); 29562306a36Sopenharmony_ci v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4), 29662306a36Sopenharmony_ci vld1q_u64(dp3 + 4)); 29762306a36Sopenharmony_ci v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6), 29862306a36Sopenharmony_ci vld1q_u64(dp3 + 6)); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci /* p1 ^= p4 ^ p5 */ 30162306a36Sopenharmony_ci v0 = eor3(v0, vld1q_u64(dp4 + 0), vld1q_u64(dp5 + 0)); 30262306a36Sopenharmony_ci v1 = eor3(v1, vld1q_u64(dp4 + 2), vld1q_u64(dp5 + 2)); 30362306a36Sopenharmony_ci v2 = eor3(v2, vld1q_u64(dp4 + 4), vld1q_u64(dp5 + 4)); 30462306a36Sopenharmony_ci v3 = eor3(v3, vld1q_u64(dp4 + 6), vld1q_u64(dp5 + 6)); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci /* store */ 30762306a36Sopenharmony_ci vst1q_u64(dp1 + 0, v0); 30862306a36Sopenharmony_ci vst1q_u64(dp1 + 2, v1); 30962306a36Sopenharmony_ci vst1q_u64(dp1 + 4, v2); 31062306a36Sopenharmony_ci vst1q_u64(dp1 + 6, v3); 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci dp1 += 8; 31362306a36Sopenharmony_ci dp2 += 8; 31462306a36Sopenharmony_ci dp3 += 8; 31562306a36Sopenharmony_ci dp4 += 8; 31662306a36Sopenharmony_ci dp5 += 8; 31762306a36Sopenharmony_ci } while (--lines > 0); 31862306a36Sopenharmony_ci} 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_cistatic int __init xor_neon_init(void) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_AS_HAS_SHA3) && cpu_have_named_feature(SHA3)) { 32362306a36Sopenharmony_ci xor_block_inner_neon.do_3 = xor_arm64_eor3_3; 32462306a36Sopenharmony_ci xor_block_inner_neon.do_4 = xor_arm64_eor3_4; 32562306a36Sopenharmony_ci xor_block_inner_neon.do_5 = xor_arm64_eor3_5; 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci return 0; 32862306a36Sopenharmony_ci} 32962306a36Sopenharmony_cimodule_init(xor_neon_init); 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cistatic void __exit xor_neon_exit(void) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci} 33462306a36Sopenharmony_cimodule_exit(xor_neon_exit); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ciMODULE_AUTHOR("Jackie Liu <liuyun01@kylinos.cn>"); 33762306a36Sopenharmony_ciMODULE_DESCRIPTION("ARMv8 XOR Extensions"); 33862306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 339