1/*
2 * Copyright 2015 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Author: Oded Gabbay <oded.gabbay@redhat.com>
24 */
25
26/**
27 * @file
28 * POWER8 intrinsics portability header.
29 *
30 */
31
32#ifndef U_PWR8_H_
33#define U_PWR8_H_
34
35#if defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
36
37#define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
38
39typedef VECTOR_ALIGN_16 vector unsigned char __m128i;
40
41typedef VECTOR_ALIGN_16 union m128i {
42   __m128i m128i;
43   vector signed int m128si;
44   vector unsigned int m128ui;
45   ubyte ub[16];
46   ushort us[8];
47   int i[4];
48   uint ui[4];
49} __m128i_union;
50
51static inline __m128i
52vec_set_epi32 (int i3, int i2, int i1, int i0)
53{
54   __m128i_union vdst;
55
56#if UTIL_ARCH_LITTLE_ENDIAN
57   vdst.i[0] = i0;
58   vdst.i[1] = i1;
59   vdst.i[2] = i2;
60   vdst.i[3] = i3;
61#else
62   vdst.i[3] = i0;
63   vdst.i[2] = i1;
64   vdst.i[1] = i2;
65   vdst.i[0] = i3;
66#endif
67
68   return (__m128i) vdst.m128si;
69}
70
71static inline __m128i
72vec_setr_epi32 (int i0, int i1, int i2, int i3)
73{
74  return vec_set_epi32 (i3, i2, i1, i0);
75}
76
77static inline __m128i
78vec_unpacklo_epi32 (__m128i even, __m128i odd)
79{
80   static const __m128i perm_mask =
81#if UTIL_ARCH_LITTLE_ENDIAN
82      { 0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23};
83#else
84      {24, 25, 26, 27,  8,  9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
85#endif
86
87   return vec_perm (even, odd, perm_mask);
88}
89
90static inline __m128i
91vec_unpackhi_epi32 (__m128i even, __m128i odd)
92{
93   static const __m128i perm_mask =
94#if UTIL_ARCH_LITTLE_ENDIAN
95      { 8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
96#else
97      {16, 17, 18, 19,  0,  1,  2,  3, 20, 21, 22, 23,  4,  5,  6,  7};
98#endif
99
100   return vec_perm (even, odd, perm_mask);
101}
102
103static inline __m128i
104vec_unpacklo_epi64 (__m128i even, __m128i odd)
105{
106   static const __m128i perm_mask =
107#if UTIL_ARCH_LITTLE_ENDIAN
108      { 0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23};
109#else
110      {24, 25, 26, 27, 28, 29, 30, 31,  8,  9, 10, 11, 12, 13, 14, 15};
111#endif
112
113   return vec_perm (even, odd, perm_mask);
114}
115
116static inline __m128i
117vec_unpackhi_epi64 (__m128i even, __m128i odd)
118{
119   static const __m128i perm_mask =
120#if UTIL_ARCH_LITTLE_ENDIAN
121      { 8,  9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
122#else
123      {16, 17, 18, 19, 20, 21, 22, 23,  0,  1,  2,  3,  4,  5,  6,  7};
124#endif
125
126   return vec_perm (even, odd, perm_mask);
127}
128
129static inline __m128i
130vec_add_epi32 (__m128i a, __m128i b)
131{
132   return (__m128i) vec_add ((vector signed int) a, (vector signed int) b);
133}
134
135static inline __m128i
136vec_sub_epi32 (__m128i a, __m128i b)
137{
138   return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b);
139}
140
141/* Call this function ONLY on POWER8 and newer platforms */
142static inline __m128i
143vec_mullo_epi32 (__m128i a, __m128i b)
144{
145   __m128i v;
146
147   __asm__(
148           "vmuluwm %0, %1, %2   \n"
149           : "=v" (v)
150           : "v" (a), "v" (b)
151           );
152
153   return v;
154}
155
156static inline __m128i
157vec_andnot_si128 (__m128i a, __m128i b)
158{
159   return vec_andc (b, a);
160}
161
162static inline void
163transpose4_epi32(const __m128i * restrict a,
164                 const __m128i * restrict b,
165                 const __m128i * restrict c,
166                 const __m128i * restrict d,
167                 __m128i * restrict o,
168                 __m128i * restrict p,
169                 __m128i * restrict q,
170                 __m128i * restrict r)
171{
172   __m128i t0 = vec_unpacklo_epi32(*a, *b);
173   __m128i t1 = vec_unpacklo_epi32(*c, *d);
174   __m128i t2 = vec_unpackhi_epi32(*a, *b);
175   __m128i t3 = vec_unpackhi_epi32(*c, *d);
176
177   *o = vec_unpacklo_epi64(t0, t1);
178   *p = vec_unpackhi_epi64(t0, t1);
179   *q = vec_unpacklo_epi64(t2, t3);
180   *r = vec_unpackhi_epi64(t2, t3);
181}
182
183static inline __m128i
184vec_slli_epi32 (__m128i vsrc, unsigned int count)
185{
186   __m128i_union vec_count;
187
188   if (count >= 32)
189      return (__m128i) vec_splats (0);
190   else if (count == 0)
191      return vsrc;
192
193   /* In VMX, all shift count fields must contain the same value */
194   vec_count.m128si = (vector signed int) vec_splats (count);
195   return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui);
196}
197
198static inline __m128i
199vec_srli_epi32 (__m128i vsrc, unsigned int count)
200{
201   __m128i_union vec_count;
202
203   if (count >= 32)
204      return (__m128i) vec_splats (0);
205   else if (count == 0)
206      return vsrc;
207
208   /* In VMX, all shift count fields must contain the same value */
209   vec_count.m128si = (vector signed int) vec_splats (count);
210   return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui);
211}
212
213static inline __m128i
214vec_srai_epi32 (__m128i vsrc, unsigned int count)
215{
216   __m128i_union vec_count;
217
218   if (count >= 32)
219      return (__m128i) vec_splats (0);
220   else if (count == 0)
221      return vsrc;
222
223   /* In VMX, all shift count fields must contain the same value */
224   vec_count.m128si = (vector signed int) vec_splats (count);
225   return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui);
226}
227
228static inline __m128i
229vec_cmpeq_epi32 (__m128i a, __m128i b)
230{
231   return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b);
232}
233
234static inline __m128i
235vec_loadu_si128 (const uint32_t* src)
236{
237   __m128i_union vsrc;
238
239#if UTIL_ARCH_LITTLE_ENDIAN
240
241   vsrc.m128ui = *((vector unsigned int *) src);
242
243#else
244
245   __m128i vmask, tmp1, tmp2;
246
247   vmask = vec_lvsl(0, src);
248
249   tmp1 = (__m128i) vec_ld (0, src);
250   tmp2 = (__m128i) vec_ld (15, src);
251   vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask);
252
253#endif
254
255   return vsrc.m128i;
256}
257
258static inline __m128i
259vec_load_si128 (const uint32_t* src)
260{
261   __m128i_union vsrc;
262
263   vsrc.m128ui = *((vector unsigned int *) src);
264
265   return vsrc.m128i;
266}
267
268static inline void
269vec_store_si128 (uint32_t* dest, __m128i vdata)
270{
271   vec_st ((vector unsigned int) vdata, 0, dest);
272}
273
274/* Call this function ONLY on POWER8 and newer platforms */
275static inline int
276vec_movemask_epi8 (__m128i vsrc)
277{
278   __m128i_union vtemp;
279   int result;
280
281   vtemp.m128i = vec_vgbbd(vsrc);
282
283#if UTIL_ARCH_LITTLE_ENDIAN
284   result = vtemp.ub[15] << 8 | vtemp.ub[7];
285#else
286   result = vtemp.ub[0] << 8 | vtemp.ub[8];
287#endif
288
289   return result;
290}
291
292static inline __m128i
293vec_packs_epi16 (__m128i a, __m128i b)
294{
295#if UTIL_ARCH_LITTLE_ENDIAN
296   return (__m128i) vec_packs ((vector signed short) a,
297                               (vector signed short) b);
298#else
299   return (__m128i) vec_packs ((vector signed short) b,
300                               (vector signed short) a);
301#endif
302}
303
304static inline __m128i
305vec_packs_epi32 (__m128i a, __m128i b)
306{
307#if UTIL_ARCH_LITTLE_ENDIAN
308   return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b);
309#else
310   return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a);
311#endif
312}
313
314#endif /* _ARCH_PWR8 && UTIL_ARCH_LITTLE_ENDIAN */
315
316#endif /* U_PWR8_H_ */
317