162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) IBM Corporation, 2012 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com> 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci/* 1062306a36Sopenharmony_ci * Sparse (as at v0.5.0) gets very, very confused by this file. 1162306a36Sopenharmony_ci * Make it a bit simpler for it. 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci#if !defined(__CHECKER__) 1462306a36Sopenharmony_ci#include <altivec.h> 1562306a36Sopenharmony_ci#else 1662306a36Sopenharmony_ci#define vec_xor(a, b) a ^ b 1762306a36Sopenharmony_ci#define vector __attribute__((vector_size(16))) 1862306a36Sopenharmony_ci#endif 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include "xor_vmx.h" 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_citypedef vector signed char unative_t; 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define DEFINE(V) \ 2562306a36Sopenharmony_ci unative_t *V = (unative_t *)V##_in; \ 2662306a36Sopenharmony_ci unative_t V##_0, V##_1, V##_2, V##_3 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#define LOAD(V) \ 2962306a36Sopenharmony_ci do { \ 3062306a36Sopenharmony_ci V##_0 = V[0]; \ 3162306a36Sopenharmony_ci V##_1 = V[1]; \ 3262306a36Sopenharmony_ci V##_2 = V[2]; \ 3362306a36Sopenharmony_ci V##_3 = V[3]; \ 3462306a36Sopenharmony_ci } while (0) 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#define STORE(V) \ 3762306a36Sopenharmony_ci do { \ 3862306a36Sopenharmony_ci V[0] = V##_0; \ 3962306a36Sopenharmony_ci V[1] = V##_1; \ 4062306a36Sopenharmony_ci V[2] = V##_2; \ 4162306a36Sopenharmony_ci V[3] = V##_3; \ 4262306a36Sopenharmony_ci } while (0) 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci#define XOR(V1, V2) \ 4562306a36Sopenharmony_ci do { \ 4662306a36Sopenharmony_ci V1##_0 = vec_xor(V1##_0, V2##_0); \ 4762306a36Sopenharmony_ci V1##_1 = vec_xor(V1##_1, V2##_1); \ 4862306a36Sopenharmony_ci V1##_2 = vec_xor(V1##_2, V2##_2); \ 4962306a36Sopenharmony_ci V1##_3 = vec_xor(V1##_3, V2##_3); \ 5062306a36Sopenharmony_ci } while (0) 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_civoid __xor_altivec_2(unsigned long bytes, 5362306a36Sopenharmony_ci unsigned long * __restrict v1_in, 5462306a36Sopenharmony_ci const unsigned long * __restrict v2_in) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci DEFINE(v1); 5762306a36Sopenharmony_ci DEFINE(v2); 5862306a36Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci do { 6162306a36Sopenharmony_ci LOAD(v1); 6262306a36Sopenharmony_ci LOAD(v2); 6362306a36Sopenharmony_ci XOR(v1, v2); 6462306a36Sopenharmony_ci STORE(v1); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci v1 += 4; 6762306a36Sopenharmony_ci v2 += 4; 6862306a36Sopenharmony_ci } while (--lines > 0); 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_civoid __xor_altivec_3(unsigned long bytes, 7262306a36Sopenharmony_ci unsigned long * __restrict v1_in, 7362306a36Sopenharmony_ci const unsigned long * __restrict v2_in, 7462306a36Sopenharmony_ci const unsigned long * __restrict v3_in) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci DEFINE(v1); 7762306a36Sopenharmony_ci DEFINE(v2); 7862306a36Sopenharmony_ci DEFINE(v3); 7962306a36Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci do { 8262306a36Sopenharmony_ci LOAD(v1); 8362306a36Sopenharmony_ci LOAD(v2); 8462306a36Sopenharmony_ci LOAD(v3); 8562306a36Sopenharmony_ci XOR(v1, v2); 8662306a36Sopenharmony_ci XOR(v1, v3); 8762306a36Sopenharmony_ci STORE(v1); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci v1 += 4; 9062306a36Sopenharmony_ci v2 += 4; 9162306a36Sopenharmony_ci v3 += 4; 9262306a36Sopenharmony_ci } while (--lines > 0); 9362306a36Sopenharmony_ci} 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_civoid __xor_altivec_4(unsigned long bytes, 9662306a36Sopenharmony_ci unsigned long * __restrict v1_in, 9762306a36Sopenharmony_ci const unsigned long * __restrict v2_in, 9862306a36Sopenharmony_ci const unsigned long * __restrict v3_in, 9962306a36Sopenharmony_ci const unsigned long * __restrict v4_in) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci DEFINE(v1); 10262306a36Sopenharmony_ci DEFINE(v2); 10362306a36Sopenharmony_ci DEFINE(v3); 10462306a36Sopenharmony_ci DEFINE(v4); 10562306a36Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci do { 10862306a36Sopenharmony_ci LOAD(v1); 10962306a36Sopenharmony_ci LOAD(v2); 11062306a36Sopenharmony_ci LOAD(v3); 11162306a36Sopenharmony_ci LOAD(v4); 11262306a36Sopenharmony_ci XOR(v1, v2); 11362306a36Sopenharmony_ci XOR(v3, v4); 11462306a36Sopenharmony_ci XOR(v1, v3); 11562306a36Sopenharmony_ci STORE(v1); 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci v1 += 4; 11862306a36Sopenharmony_ci v2 += 4; 11962306a36Sopenharmony_ci v3 += 4; 12062306a36Sopenharmony_ci v4 += 4; 12162306a36Sopenharmony_ci } while (--lines > 0); 12262306a36Sopenharmony_ci} 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_civoid __xor_altivec_5(unsigned long bytes, 12562306a36Sopenharmony_ci unsigned long * __restrict v1_in, 12662306a36Sopenharmony_ci const unsigned long * __restrict v2_in, 12762306a36Sopenharmony_ci const unsigned long * __restrict v3_in, 12862306a36Sopenharmony_ci const unsigned long * __restrict v4_in, 12962306a36Sopenharmony_ci const unsigned long * __restrict v5_in) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci DEFINE(v1); 13262306a36Sopenharmony_ci DEFINE(v2); 13362306a36Sopenharmony_ci DEFINE(v3); 13462306a36Sopenharmony_ci DEFINE(v4); 13562306a36Sopenharmony_ci DEFINE(v5); 13662306a36Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci do { 13962306a36Sopenharmony_ci LOAD(v1); 14062306a36Sopenharmony_ci LOAD(v2); 14162306a36Sopenharmony_ci LOAD(v3); 14262306a36Sopenharmony_ci LOAD(v4); 14362306a36Sopenharmony_ci LOAD(v5); 14462306a36Sopenharmony_ci XOR(v1, v2); 14562306a36Sopenharmony_ci XOR(v3, v4); 14662306a36Sopenharmony_ci XOR(v1, v5); 14762306a36Sopenharmony_ci XOR(v1, v3); 14862306a36Sopenharmony_ci STORE(v1); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci v1 += 4; 15162306a36Sopenharmony_ci v2 += 4; 15262306a36Sopenharmony_ci v3 += 4; 15362306a36Sopenharmony_ci v4 += 4; 15462306a36Sopenharmony_ci v5 += 4; 15562306a36Sopenharmony_ci } while (--lines > 0); 15662306a36Sopenharmony_ci} 157