162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * arch/arm/include/asm/xor.h 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2001 Russell King 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include <linux/hardirq.h> 862306a36Sopenharmony_ci#include <asm-generic/xor.h> 962306a36Sopenharmony_ci#include <asm/hwcap.h> 1062306a36Sopenharmony_ci#include <asm/neon.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#define __XOR(a1, a2) a1 ^= a2 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#define GET_BLOCK_2(dst) \ 1562306a36Sopenharmony_ci __asm__("ldmia %0, {%1, %2}" \ 1662306a36Sopenharmony_ci : "=r" (dst), "=r" (a1), "=r" (a2) \ 1762306a36Sopenharmony_ci : "0" (dst)) 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#define GET_BLOCK_4(dst) \ 2062306a36Sopenharmony_ci __asm__("ldmia %0, {%1, %2, %3, %4}" \ 2162306a36Sopenharmony_ci : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ 2262306a36Sopenharmony_ci : "0" (dst)) 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define XOR_BLOCK_2(src) \ 2562306a36Sopenharmony_ci __asm__("ldmia %0!, {%1, %2}" \ 2662306a36Sopenharmony_ci : "=r" (src), "=r" (b1), "=r" (b2) \ 2762306a36Sopenharmony_ci : "0" (src)); \ 2862306a36Sopenharmony_ci __XOR(a1, b1); __XOR(a2, b2); 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#define XOR_BLOCK_4(src) \ 3162306a36Sopenharmony_ci __asm__("ldmia %0!, {%1, %2, %3, %4}" \ 3262306a36Sopenharmony_ci : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ 3362306a36Sopenharmony_ci : "0" (src)); \ 3462306a36Sopenharmony_ci __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#define PUT_BLOCK_2(dst) \ 3762306a36Sopenharmony_ci __asm__ __volatile__("stmia %0!, {%2, %3}" \ 3862306a36Sopenharmony_ci : "=r" (dst) \ 3962306a36Sopenharmony_ci : "0" (dst), "r" (a1), "r" (a2)) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define PUT_BLOCK_4(dst) \ 4262306a36Sopenharmony_ci __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ 4362306a36Sopenharmony_ci : "=r" (dst) \ 4462306a36Sopenharmony_ci : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic void 4762306a36Sopenharmony_cixor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, 4862306a36Sopenharmony_ci const unsigned long * __restrict p2) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci unsigned int lines = bytes / sizeof(unsigned long) / 4; 5162306a36Sopenharmony_ci register unsigned int a1 __asm__("r4"); 5262306a36Sopenharmony_ci register unsigned int a2 __asm__("r5"); 5362306a36Sopenharmony_ci register unsigned int a3 __asm__("r6"); 5462306a36Sopenharmony_ci register unsigned int a4 __asm__("r10"); 5562306a36Sopenharmony_ci register unsigned int b1 __asm__("r8"); 5662306a36Sopenharmony_ci register unsigned int b2 __asm__("r9"); 5762306a36Sopenharmony_ci register unsigned int b3 __asm__("ip"); 5862306a36Sopenharmony_ci register unsigned int b4 __asm__("lr"); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci do { 6162306a36Sopenharmony_ci GET_BLOCK_4(p1); 6262306a36Sopenharmony_ci XOR_BLOCK_4(p2); 6362306a36Sopenharmony_ci PUT_BLOCK_4(p1); 6462306a36Sopenharmony_ci } while (--lines); 6562306a36Sopenharmony_ci} 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic void 6862306a36Sopenharmony_cixor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, 6962306a36Sopenharmony_ci const unsigned long * __restrict p2, 7062306a36Sopenharmony_ci const unsigned long * __restrict p3) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci unsigned int lines = bytes / sizeof(unsigned long) / 4; 7362306a36Sopenharmony_ci register unsigned int a1 __asm__("r4"); 7462306a36Sopenharmony_ci register unsigned int a2 __asm__("r5"); 7562306a36Sopenharmony_ci register unsigned int a3 __asm__("r6"); 7662306a36Sopenharmony_ci register unsigned int a4 __asm__("r10"); 7762306a36Sopenharmony_ci register unsigned int b1 __asm__("r8"); 7862306a36Sopenharmony_ci register unsigned int b2 __asm__("r9"); 7962306a36Sopenharmony_ci register unsigned int b3 __asm__("ip"); 8062306a36Sopenharmony_ci register unsigned int b4 __asm__("lr"); 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci do { 8362306a36Sopenharmony_ci GET_BLOCK_4(p1); 8462306a36Sopenharmony_ci XOR_BLOCK_4(p2); 8562306a36Sopenharmony_ci XOR_BLOCK_4(p3); 8662306a36Sopenharmony_ci PUT_BLOCK_4(p1); 8762306a36Sopenharmony_ci } while (--lines); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistatic void 9162306a36Sopenharmony_cixor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1, 9262306a36Sopenharmony_ci const unsigned long * __restrict p2, 9362306a36Sopenharmony_ci const unsigned long * __restrict p3, 9462306a36Sopenharmony_ci const unsigned long * __restrict p4) 9562306a36Sopenharmony_ci{ 9662306a36Sopenharmony_ci unsigned int lines = bytes / sizeof(unsigned long) / 2; 9762306a36Sopenharmony_ci register unsigned int a1 __asm__("r8"); 9862306a36Sopenharmony_ci register unsigned int a2 __asm__("r9"); 9962306a36Sopenharmony_ci register unsigned int b1 __asm__("ip"); 10062306a36Sopenharmony_ci register unsigned int b2 __asm__("lr"); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci do { 10362306a36Sopenharmony_ci GET_BLOCK_2(p1); 10462306a36Sopenharmony_ci XOR_BLOCK_2(p2); 10562306a36Sopenharmony_ci XOR_BLOCK_2(p3); 10662306a36Sopenharmony_ci XOR_BLOCK_2(p4); 10762306a36Sopenharmony_ci PUT_BLOCK_2(p1); 10862306a36Sopenharmony_ci } while (--lines); 10962306a36Sopenharmony_ci} 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_cistatic void 11262306a36Sopenharmony_cixor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, 11362306a36Sopenharmony_ci const unsigned long * __restrict p2, 11462306a36Sopenharmony_ci const unsigned long * __restrict p3, 11562306a36Sopenharmony_ci const unsigned long * __restrict p4, 11662306a36Sopenharmony_ci const unsigned long * __restrict p5) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci unsigned int lines = bytes / sizeof(unsigned long) / 2; 11962306a36Sopenharmony_ci register unsigned int a1 __asm__("r8"); 12062306a36Sopenharmony_ci register unsigned int a2 __asm__("r9"); 12162306a36Sopenharmony_ci register unsigned int b1 __asm__("ip"); 12262306a36Sopenharmony_ci register unsigned int b2 __asm__("lr"); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci do { 12562306a36Sopenharmony_ci GET_BLOCK_2(p1); 12662306a36Sopenharmony_ci XOR_BLOCK_2(p2); 12762306a36Sopenharmony_ci XOR_BLOCK_2(p3); 12862306a36Sopenharmony_ci XOR_BLOCK_2(p4); 12962306a36Sopenharmony_ci XOR_BLOCK_2(p5); 13062306a36Sopenharmony_ci PUT_BLOCK_2(p1); 13162306a36Sopenharmony_ci } while (--lines); 13262306a36Sopenharmony_ci} 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_cistatic struct xor_block_template xor_block_arm4regs = { 13562306a36Sopenharmony_ci .name = "arm4regs", 13662306a36Sopenharmony_ci .do_2 = xor_arm4regs_2, 13762306a36Sopenharmony_ci .do_3 = xor_arm4regs_3, 13862306a36Sopenharmony_ci .do_4 = xor_arm4regs_4, 13962306a36Sopenharmony_ci .do_5 = xor_arm4regs_5, 14062306a36Sopenharmony_ci}; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci#undef XOR_TRY_TEMPLATES 14362306a36Sopenharmony_ci#define XOR_TRY_TEMPLATES \ 14462306a36Sopenharmony_ci do { \ 14562306a36Sopenharmony_ci xor_speed(&xor_block_arm4regs); \ 14662306a36Sopenharmony_ci xor_speed(&xor_block_8regs); \ 14762306a36Sopenharmony_ci xor_speed(&xor_block_32regs); \ 14862306a36Sopenharmony_ci NEON_TEMPLATES; \ 14962306a36Sopenharmony_ci } while (0) 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci#ifdef CONFIG_KERNEL_MODE_NEON 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ciextern struct xor_block_template const xor_block_neon_inner; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic void 15662306a36Sopenharmony_cixor_neon_2(unsigned long bytes, unsigned long * __restrict p1, 15762306a36Sopenharmony_ci const unsigned long * __restrict p2) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci if (in_interrupt()) { 16062306a36Sopenharmony_ci xor_arm4regs_2(bytes, p1, p2); 16162306a36Sopenharmony_ci } else { 16262306a36Sopenharmony_ci kernel_neon_begin(); 16362306a36Sopenharmony_ci xor_block_neon_inner.do_2(bytes, p1, p2); 16462306a36Sopenharmony_ci kernel_neon_end(); 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci} 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_cistatic void 16962306a36Sopenharmony_cixor_neon_3(unsigned long bytes, unsigned long * __restrict p1, 17062306a36Sopenharmony_ci const unsigned long * __restrict p2, 17162306a36Sopenharmony_ci const unsigned long * __restrict p3) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci if (in_interrupt()) { 17462306a36Sopenharmony_ci xor_arm4regs_3(bytes, p1, p2, p3); 17562306a36Sopenharmony_ci } else { 17662306a36Sopenharmony_ci kernel_neon_begin(); 17762306a36Sopenharmony_ci xor_block_neon_inner.do_3(bytes, p1, p2, p3); 17862306a36Sopenharmony_ci kernel_neon_end(); 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_cistatic void 18362306a36Sopenharmony_cixor_neon_4(unsigned long bytes, unsigned long * __restrict p1, 18462306a36Sopenharmony_ci const unsigned long * __restrict p2, 18562306a36Sopenharmony_ci const unsigned long * __restrict p3, 18662306a36Sopenharmony_ci const unsigned long * __restrict p4) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci if (in_interrupt()) { 18962306a36Sopenharmony_ci xor_arm4regs_4(bytes, p1, p2, p3, p4); 19062306a36Sopenharmony_ci } else { 19162306a36Sopenharmony_ci kernel_neon_begin(); 19262306a36Sopenharmony_ci xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); 19362306a36Sopenharmony_ci kernel_neon_end(); 19462306a36Sopenharmony_ci } 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistatic void 19862306a36Sopenharmony_cixor_neon_5(unsigned long bytes, unsigned long * __restrict p1, 19962306a36Sopenharmony_ci const unsigned long * __restrict p2, 20062306a36Sopenharmony_ci const unsigned long * __restrict p3, 20162306a36Sopenharmony_ci const unsigned long * __restrict p4, 20262306a36Sopenharmony_ci const unsigned long * __restrict p5) 20362306a36Sopenharmony_ci{ 20462306a36Sopenharmony_ci if (in_interrupt()) { 20562306a36Sopenharmony_ci xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); 20662306a36Sopenharmony_ci } else { 20762306a36Sopenharmony_ci kernel_neon_begin(); 20862306a36Sopenharmony_ci xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); 20962306a36Sopenharmony_ci kernel_neon_end(); 21062306a36Sopenharmony_ci } 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_cistatic struct xor_block_template xor_block_neon = { 21462306a36Sopenharmony_ci .name = "neon", 21562306a36Sopenharmony_ci .do_2 = xor_neon_2, 21662306a36Sopenharmony_ci .do_3 = xor_neon_3, 21762306a36Sopenharmony_ci .do_4 = xor_neon_4, 21862306a36Sopenharmony_ci .do_5 = xor_neon_5 21962306a36Sopenharmony_ci}; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci#define NEON_TEMPLATES \ 22262306a36Sopenharmony_ci do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) 22362306a36Sopenharmony_ci#else 22462306a36Sopenharmony_ci#define NEON_TEMPLATES 22562306a36Sopenharmony_ci#endif 226