162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Copyright (C) IBM Corporation, 2012
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com>
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci/*
1062306a36Sopenharmony_ci * Sparse (as at v0.5.0) gets very, very confused by this file.
1162306a36Sopenharmony_ci * Make it a bit simpler for it.
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci#if !defined(__CHECKER__)
1462306a36Sopenharmony_ci#include <altivec.h>
1562306a36Sopenharmony_ci#else
1662306a36Sopenharmony_ci#define vec_xor(a, b) a ^ b
1762306a36Sopenharmony_ci#define vector __attribute__((vector_size(16)))
1862306a36Sopenharmony_ci#endif
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include "xor_vmx.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_citypedef vector signed char unative_t;
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#define DEFINE(V)				\
2562306a36Sopenharmony_ci	unative_t *V = (unative_t *)V##_in;	\
2662306a36Sopenharmony_ci	unative_t V##_0, V##_1, V##_2, V##_3
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#define LOAD(V)			\
2962306a36Sopenharmony_ci	do {			\
3062306a36Sopenharmony_ci		V##_0 = V[0];	\
3162306a36Sopenharmony_ci		V##_1 = V[1];	\
3262306a36Sopenharmony_ci		V##_2 = V[2];	\
3362306a36Sopenharmony_ci		V##_3 = V[3];	\
3462306a36Sopenharmony_ci	} while (0)
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#define STORE(V)		\
3762306a36Sopenharmony_ci	do {			\
3862306a36Sopenharmony_ci		V[0] = V##_0;	\
3962306a36Sopenharmony_ci		V[1] = V##_1;	\
4062306a36Sopenharmony_ci		V[2] = V##_2;	\
4162306a36Sopenharmony_ci		V[3] = V##_3;	\
4262306a36Sopenharmony_ci	} while (0)
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci#define XOR(V1, V2)					\
4562306a36Sopenharmony_ci	do {						\
4662306a36Sopenharmony_ci		V1##_0 = vec_xor(V1##_0, V2##_0);	\
4762306a36Sopenharmony_ci		V1##_1 = vec_xor(V1##_1, V2##_1);	\
4862306a36Sopenharmony_ci		V1##_2 = vec_xor(V1##_2, V2##_2);	\
4962306a36Sopenharmony_ci		V1##_3 = vec_xor(V1##_3, V2##_3);	\
5062306a36Sopenharmony_ci	} while (0)
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_civoid __xor_altivec_2(unsigned long bytes,
5362306a36Sopenharmony_ci		     unsigned long * __restrict v1_in,
5462306a36Sopenharmony_ci		     const unsigned long * __restrict v2_in)
5562306a36Sopenharmony_ci{
5662306a36Sopenharmony_ci	DEFINE(v1);
5762306a36Sopenharmony_ci	DEFINE(v2);
5862306a36Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	do {
6162306a36Sopenharmony_ci		LOAD(v1);
6262306a36Sopenharmony_ci		LOAD(v2);
6362306a36Sopenharmony_ci		XOR(v1, v2);
6462306a36Sopenharmony_ci		STORE(v1);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci		v1 += 4;
6762306a36Sopenharmony_ci		v2 += 4;
6862306a36Sopenharmony_ci	} while (--lines > 0);
6962306a36Sopenharmony_ci}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_civoid __xor_altivec_3(unsigned long bytes,
7262306a36Sopenharmony_ci		     unsigned long * __restrict v1_in,
7362306a36Sopenharmony_ci		     const unsigned long * __restrict v2_in,
7462306a36Sopenharmony_ci		     const unsigned long * __restrict v3_in)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	DEFINE(v1);
7762306a36Sopenharmony_ci	DEFINE(v2);
7862306a36Sopenharmony_ci	DEFINE(v3);
7962306a36Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	do {
8262306a36Sopenharmony_ci		LOAD(v1);
8362306a36Sopenharmony_ci		LOAD(v2);
8462306a36Sopenharmony_ci		LOAD(v3);
8562306a36Sopenharmony_ci		XOR(v1, v2);
8662306a36Sopenharmony_ci		XOR(v1, v3);
8762306a36Sopenharmony_ci		STORE(v1);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci		v1 += 4;
9062306a36Sopenharmony_ci		v2 += 4;
9162306a36Sopenharmony_ci		v3 += 4;
9262306a36Sopenharmony_ci	} while (--lines > 0);
9362306a36Sopenharmony_ci}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_civoid __xor_altivec_4(unsigned long bytes,
9662306a36Sopenharmony_ci		     unsigned long * __restrict v1_in,
9762306a36Sopenharmony_ci		     const unsigned long * __restrict v2_in,
9862306a36Sopenharmony_ci		     const unsigned long * __restrict v3_in,
9962306a36Sopenharmony_ci		     const unsigned long * __restrict v4_in)
10062306a36Sopenharmony_ci{
10162306a36Sopenharmony_ci	DEFINE(v1);
10262306a36Sopenharmony_ci	DEFINE(v2);
10362306a36Sopenharmony_ci	DEFINE(v3);
10462306a36Sopenharmony_ci	DEFINE(v4);
10562306a36Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	do {
10862306a36Sopenharmony_ci		LOAD(v1);
10962306a36Sopenharmony_ci		LOAD(v2);
11062306a36Sopenharmony_ci		LOAD(v3);
11162306a36Sopenharmony_ci		LOAD(v4);
11262306a36Sopenharmony_ci		XOR(v1, v2);
11362306a36Sopenharmony_ci		XOR(v3, v4);
11462306a36Sopenharmony_ci		XOR(v1, v3);
11562306a36Sopenharmony_ci		STORE(v1);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci		v1 += 4;
11862306a36Sopenharmony_ci		v2 += 4;
11962306a36Sopenharmony_ci		v3 += 4;
12062306a36Sopenharmony_ci		v4 += 4;
12162306a36Sopenharmony_ci	} while (--lines > 0);
12262306a36Sopenharmony_ci}
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_civoid __xor_altivec_5(unsigned long bytes,
12562306a36Sopenharmony_ci		     unsigned long * __restrict v1_in,
12662306a36Sopenharmony_ci		     const unsigned long * __restrict v2_in,
12762306a36Sopenharmony_ci		     const unsigned long * __restrict v3_in,
12862306a36Sopenharmony_ci		     const unsigned long * __restrict v4_in,
12962306a36Sopenharmony_ci		     const unsigned long * __restrict v5_in)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	DEFINE(v1);
13262306a36Sopenharmony_ci	DEFINE(v2);
13362306a36Sopenharmony_ci	DEFINE(v3);
13462306a36Sopenharmony_ci	DEFINE(v4);
13562306a36Sopenharmony_ci	DEFINE(v5);
13662306a36Sopenharmony_ci	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	do {
13962306a36Sopenharmony_ci		LOAD(v1);
14062306a36Sopenharmony_ci		LOAD(v2);
14162306a36Sopenharmony_ci		LOAD(v3);
14262306a36Sopenharmony_ci		LOAD(v4);
14362306a36Sopenharmony_ci		LOAD(v5);
14462306a36Sopenharmony_ci		XOR(v1, v2);
14562306a36Sopenharmony_ci		XOR(v3, v4);
14662306a36Sopenharmony_ci		XOR(v1, v5);
14762306a36Sopenharmony_ci		XOR(v1, v3);
14862306a36Sopenharmony_ci		STORE(v1);
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci		v1 += 4;
15162306a36Sopenharmony_ci		v2 += 4;
15262306a36Sopenharmony_ci		v3 += 4;
15362306a36Sopenharmony_ci		v4 += 4;
15462306a36Sopenharmony_ci		v5 += 4;
15562306a36Sopenharmony_ci	} while (--lines > 0);
15662306a36Sopenharmony_ci}
157