18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2012
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com>
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci/*
108c2ecf20Sopenharmony_ci * Sparse (as at v0.5.0) gets very, very confused by this file.
118c2ecf20Sopenharmony_ci * Make it a bit simpler for it.
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci#if !defined(__CHECKER__)
148c2ecf20Sopenharmony_ci#include <altivec.h>
158c2ecf20Sopenharmony_ci#else
168c2ecf20Sopenharmony_ci#define vec_xor(a, b) a ^ b
178c2ecf20Sopenharmony_ci#define vector __attribute__((vector_size(16)))
188c2ecf20Sopenharmony_ci#endif
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#include "xor_vmx.h"
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_citypedef vector signed char unative_t;
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#define DEFINE(V)				\
258c2ecf20Sopenharmony_ci	unative_t *V = (unative_t *)V##_in;	\
268c2ecf20Sopenharmony_ci	unative_t V##_0, V##_1, V##_2, V##_3
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#define LOAD(V)			\
298c2ecf20Sopenharmony_ci	do {			\
308c2ecf20Sopenharmony_ci		V##_0 = V[0];	\
318c2ecf20Sopenharmony_ci		V##_1 = V[1];	\
328c2ecf20Sopenharmony_ci		V##_2 = V[2];	\
338c2ecf20Sopenharmony_ci		V##_3 = V[3];	\
348c2ecf20Sopenharmony_ci	} while (0)
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci#define STORE(V)		\
378c2ecf20Sopenharmony_ci	do {			\
388c2ecf20Sopenharmony_ci		V[0] = V##_0;	\
398c2ecf20Sopenharmony_ci		V[1] = V##_1;	\
408c2ecf20Sopenharmony_ci		V[2] = V##_2;	\
418c2ecf20Sopenharmony_ci		V[3] = V##_3;	\
428c2ecf20Sopenharmony_ci	} while (0)
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci#define XOR(V1, V2)					\
458c2ecf20Sopenharmony_ci	do {						\
468c2ecf20Sopenharmony_ci		V1##_0 = vec_xor(V1##_0, V2##_0);	\
478c2ecf20Sopenharmony_ci		V1##_1 = vec_xor(V1##_1, V2##_1);	\
488c2ecf20Sopenharmony_ci		V1##_2 = vec_xor(V1##_2, V2##_2);	\
498c2ecf20Sopenharmony_ci		V1##_3 = vec_xor(V1##_3, V2##_3);	\
508c2ecf20Sopenharmony_ci	} while (0)
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_civoid __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
538c2ecf20Sopenharmony_ci		     unsigned long *v2_in)
548c2ecf20Sopenharmony_ci{
558c2ecf20Sopenharmony_ci	DEFINE(v1);
568c2ecf20Sopenharmony_ci	DEFINE(v2);
578c2ecf20Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	do {
608c2ecf20Sopenharmony_ci		LOAD(v1);
618c2ecf20Sopenharmony_ci		LOAD(v2);
628c2ecf20Sopenharmony_ci		XOR(v1, v2);
638c2ecf20Sopenharmony_ci		STORE(v1);
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci		v1 += 4;
668c2ecf20Sopenharmony_ci		v2 += 4;
678c2ecf20Sopenharmony_ci	} while (--lines > 0);
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_civoid __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
718c2ecf20Sopenharmony_ci		     unsigned long *v2_in, unsigned long *v3_in)
728c2ecf20Sopenharmony_ci{
738c2ecf20Sopenharmony_ci	DEFINE(v1);
748c2ecf20Sopenharmony_ci	DEFINE(v2);
758c2ecf20Sopenharmony_ci	DEFINE(v3);
768c2ecf20Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	do {
798c2ecf20Sopenharmony_ci		LOAD(v1);
808c2ecf20Sopenharmony_ci		LOAD(v2);
818c2ecf20Sopenharmony_ci		LOAD(v3);
828c2ecf20Sopenharmony_ci		XOR(v1, v2);
838c2ecf20Sopenharmony_ci		XOR(v1, v3);
848c2ecf20Sopenharmony_ci		STORE(v1);
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci		v1 += 4;
878c2ecf20Sopenharmony_ci		v2 += 4;
888c2ecf20Sopenharmony_ci		v3 += 4;
898c2ecf20Sopenharmony_ci	} while (--lines > 0);
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_civoid __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
938c2ecf20Sopenharmony_ci		     unsigned long *v2_in, unsigned long *v3_in,
948c2ecf20Sopenharmony_ci		     unsigned long *v4_in)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	DEFINE(v1);
978c2ecf20Sopenharmony_ci	DEFINE(v2);
988c2ecf20Sopenharmony_ci	DEFINE(v3);
998c2ecf20Sopenharmony_ci	DEFINE(v4);
1008c2ecf20Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	do {
1038c2ecf20Sopenharmony_ci		LOAD(v1);
1048c2ecf20Sopenharmony_ci		LOAD(v2);
1058c2ecf20Sopenharmony_ci		LOAD(v3);
1068c2ecf20Sopenharmony_ci		LOAD(v4);
1078c2ecf20Sopenharmony_ci		XOR(v1, v2);
1088c2ecf20Sopenharmony_ci		XOR(v3, v4);
1098c2ecf20Sopenharmony_ci		XOR(v1, v3);
1108c2ecf20Sopenharmony_ci		STORE(v1);
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci		v1 += 4;
1138c2ecf20Sopenharmony_ci		v2 += 4;
1148c2ecf20Sopenharmony_ci		v3 += 4;
1158c2ecf20Sopenharmony_ci		v4 += 4;
1168c2ecf20Sopenharmony_ci	} while (--lines > 0);
1178c2ecf20Sopenharmony_ci}
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_civoid __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
1208c2ecf20Sopenharmony_ci		     unsigned long *v2_in, unsigned long *v3_in,
1218c2ecf20Sopenharmony_ci		     unsigned long *v4_in, unsigned long *v5_in)
1228c2ecf20Sopenharmony_ci{
1238c2ecf20Sopenharmony_ci	DEFINE(v1);
1248c2ecf20Sopenharmony_ci	DEFINE(v2);
1258c2ecf20Sopenharmony_ci	DEFINE(v3);
1268c2ecf20Sopenharmony_ci	DEFINE(v4);
1278c2ecf20Sopenharmony_ci	DEFINE(v5);
1288c2ecf20Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	do {
1318c2ecf20Sopenharmony_ci		LOAD(v1);
1328c2ecf20Sopenharmony_ci		LOAD(v2);
1338c2ecf20Sopenharmony_ci		LOAD(v3);
1348c2ecf20Sopenharmony_ci		LOAD(v4);
1358c2ecf20Sopenharmony_ci		LOAD(v5);
1368c2ecf20Sopenharmony_ci		XOR(v1, v2);
1378c2ecf20Sopenharmony_ci		XOR(v3, v4);
1388c2ecf20Sopenharmony_ci		XOR(v1, v5);
1398c2ecf20Sopenharmony_ci		XOR(v1, v3);
1408c2ecf20Sopenharmony_ci		STORE(v1);
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci		v1 += 4;
1438c2ecf20Sopenharmony_ci		v2 += 4;
1448c2ecf20Sopenharmony_ci		v3 += 4;
1458c2ecf20Sopenharmony_ci		v4 += 4;
1468c2ecf20Sopenharmony_ci		v5 += 4;
1478c2ecf20Sopenharmony_ci	} while (--lines > 0);
1488c2ecf20Sopenharmony_ci}
149