18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2012 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com> 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci/* 108c2ecf20Sopenharmony_ci * Sparse (as at v0.5.0) gets very, very confused by this file. 118c2ecf20Sopenharmony_ci * Make it a bit simpler for it. 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci#if !defined(__CHECKER__) 148c2ecf20Sopenharmony_ci#include <altivec.h> 158c2ecf20Sopenharmony_ci#else 168c2ecf20Sopenharmony_ci#define vec_xor(a, b) a ^ b 178c2ecf20Sopenharmony_ci#define vector __attribute__((vector_size(16))) 188c2ecf20Sopenharmony_ci#endif 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#include "xor_vmx.h" 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_citypedef vector signed char unative_t; 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#define DEFINE(V) \ 258c2ecf20Sopenharmony_ci unative_t *V = (unative_t *)V##_in; \ 268c2ecf20Sopenharmony_ci unative_t V##_0, V##_1, V##_2, V##_3 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#define LOAD(V) \ 298c2ecf20Sopenharmony_ci do { \ 308c2ecf20Sopenharmony_ci V##_0 = V[0]; \ 318c2ecf20Sopenharmony_ci V##_1 = V[1]; \ 328c2ecf20Sopenharmony_ci V##_2 = V[2]; \ 338c2ecf20Sopenharmony_ci V##_3 = V[3]; \ 348c2ecf20Sopenharmony_ci } while (0) 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#define STORE(V) \ 378c2ecf20Sopenharmony_ci do { \ 388c2ecf20Sopenharmony_ci V[0] = V##_0; \ 398c2ecf20Sopenharmony_ci V[1] = V##_1; \ 408c2ecf20Sopenharmony_ci V[2] = V##_2; \ 418c2ecf20Sopenharmony_ci V[3] = V##_3; \ 428c2ecf20Sopenharmony_ci } while (0) 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci#define XOR(V1, V2) \ 458c2ecf20Sopenharmony_ci do { \ 468c2ecf20Sopenharmony_ci V1##_0 = vec_xor(V1##_0, V2##_0); \ 478c2ecf20Sopenharmony_ci V1##_1 = vec_xor(V1##_1, V2##_1); \ 488c2ecf20Sopenharmony_ci V1##_2 = vec_xor(V1##_2, V2##_2); \ 498c2ecf20Sopenharmony_ci V1##_3 = vec_xor(V1##_3, V2##_3); \ 508c2ecf20Sopenharmony_ci } while (0) 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_civoid __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, 538c2ecf20Sopenharmony_ci unsigned long *v2_in) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci DEFINE(v1); 568c2ecf20Sopenharmony_ci DEFINE(v2); 578c2ecf20Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci do { 608c2ecf20Sopenharmony_ci LOAD(v1); 618c2ecf20Sopenharmony_ci LOAD(v2); 628c2ecf20Sopenharmony_ci XOR(v1, v2); 638c2ecf20Sopenharmony_ci STORE(v1); 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci v1 += 4; 668c2ecf20Sopenharmony_ci v2 += 4; 678c2ecf20Sopenharmony_ci } while (--lines > 0); 688c2ecf20Sopenharmony_ci} 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_civoid __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, 718c2ecf20Sopenharmony_ci unsigned long *v2_in, unsigned long *v3_in) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci DEFINE(v1); 748c2ecf20Sopenharmony_ci DEFINE(v2); 758c2ecf20Sopenharmony_ci DEFINE(v3); 768c2ecf20Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci do { 798c2ecf20Sopenharmony_ci LOAD(v1); 808c2ecf20Sopenharmony_ci LOAD(v2); 818c2ecf20Sopenharmony_ci LOAD(v3); 828c2ecf20Sopenharmony_ci XOR(v1, v2); 838c2ecf20Sopenharmony_ci XOR(v1, v3); 848c2ecf20Sopenharmony_ci STORE(v1); 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci v1 += 4; 878c2ecf20Sopenharmony_ci v2 += 4; 888c2ecf20Sopenharmony_ci v3 += 4; 898c2ecf20Sopenharmony_ci } while (--lines > 0); 908c2ecf20Sopenharmony_ci} 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_civoid __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, 938c2ecf20Sopenharmony_ci unsigned long *v2_in, unsigned long *v3_in, 948c2ecf20Sopenharmony_ci unsigned long *v4_in) 958c2ecf20Sopenharmony_ci{ 968c2ecf20Sopenharmony_ci DEFINE(v1); 978c2ecf20Sopenharmony_ci DEFINE(v2); 988c2ecf20Sopenharmony_ci DEFINE(v3); 998c2ecf20Sopenharmony_ci DEFINE(v4); 1008c2ecf20Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci do { 1038c2ecf20Sopenharmony_ci LOAD(v1); 1048c2ecf20Sopenharmony_ci LOAD(v2); 1058c2ecf20Sopenharmony_ci LOAD(v3); 1068c2ecf20Sopenharmony_ci LOAD(v4); 1078c2ecf20Sopenharmony_ci XOR(v1, v2); 1088c2ecf20Sopenharmony_ci XOR(v3, v4); 1098c2ecf20Sopenharmony_ci XOR(v1, v3); 1108c2ecf20Sopenharmony_ci STORE(v1); 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci v1 += 4; 1138c2ecf20Sopenharmony_ci v2 += 4; 1148c2ecf20Sopenharmony_ci v3 += 4; 1158c2ecf20Sopenharmony_ci v4 += 4; 1168c2ecf20Sopenharmony_ci } while (--lines > 0); 1178c2ecf20Sopenharmony_ci} 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_civoid __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, 1208c2ecf20Sopenharmony_ci unsigned long *v2_in, unsigned long *v3_in, 1218c2ecf20Sopenharmony_ci unsigned long *v4_in, unsigned long *v5_in) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci DEFINE(v1); 1248c2ecf20Sopenharmony_ci DEFINE(v2); 1258c2ecf20Sopenharmony_ci DEFINE(v3); 1268c2ecf20Sopenharmony_ci DEFINE(v4); 1278c2ecf20Sopenharmony_ci DEFINE(v5); 1288c2ecf20Sopenharmony_ci unsigned long lines = bytes / (sizeof(unative_t)) / 4; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci do { 1318c2ecf20Sopenharmony_ci LOAD(v1); 1328c2ecf20Sopenharmony_ci LOAD(v2); 1338c2ecf20Sopenharmony_ci LOAD(v3); 1348c2ecf20Sopenharmony_ci LOAD(v4); 1358c2ecf20Sopenharmony_ci LOAD(v5); 1368c2ecf20Sopenharmony_ci XOR(v1, v2); 1378c2ecf20Sopenharmony_ci XOR(v3, v4); 1388c2ecf20Sopenharmony_ci XOR(v1, v5); 1398c2ecf20Sopenharmony_ci XOR(v1, v3); 1408c2ecf20Sopenharmony_ci STORE(v1); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci v1 += 4; 1438c2ecf20Sopenharmony_ci v2 += 4; 1448c2ecf20Sopenharmony_ci v3 += 4; 1458c2ecf20Sopenharmony_ci v4 += 4; 1468c2ecf20Sopenharmony_ci v5 += 4; 1478c2ecf20Sopenharmony_ci } while (--lines > 0); 1488c2ecf20Sopenharmony_ci} 149