1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * This file is part of FFmpeg. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 5cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 6cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 7cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12cabdff1aSopenharmony_ci * Lesser General Public License for more details. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 15cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 16cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17cabdff1aSopenharmony_ci */ 18cabdff1aSopenharmony_ci 19cabdff1aSopenharmony_ci/** 20cabdff1aSopenharmony_ci * @file 21cabdff1aSopenharmony_ci * Contains misc utility macros and inline functions 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#ifndef AVUTIL_PPC_UTIL_ALTIVEC_H 25cabdff1aSopenharmony_ci#define AVUTIL_PPC_UTIL_ALTIVEC_H 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include <stdint.h> 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#include "config.h" 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci/*********************************************************************** 32cabdff1aSopenharmony_ci * Vector types 33cabdff1aSopenharmony_ci **********************************************************************/ 34cabdff1aSopenharmony_ci#define vec_u8 vector unsigned char 35cabdff1aSopenharmony_ci#define vec_s8 vector signed char 36cabdff1aSopenharmony_ci#define vec_u16 vector unsigned short 37cabdff1aSopenharmony_ci#define vec_s16 vector signed short 38cabdff1aSopenharmony_ci#define vec_u32 vector unsigned int 39cabdff1aSopenharmony_ci#define vec_s32 vector signed int 40cabdff1aSopenharmony_ci#define vec_f vector float 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_ci/*********************************************************************** 43cabdff1aSopenharmony_ci * Null vector 44cabdff1aSopenharmony_ci **********************************************************************/ 45cabdff1aSopenharmony_ci#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 ) 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci#define zero_u8v (vec_u8) zerov 48cabdff1aSopenharmony_ci#define zero_s8v (vec_s8) zerov 49cabdff1aSopenharmony_ci#define zero_u16v (vec_u16) zerov 50cabdff1aSopenharmony_ci#define zero_s16v (vec_s16) zerov 51cabdff1aSopenharmony_ci#define zero_u32v (vec_u32) zerov 52cabdff1aSopenharmony_ci#define zero_s32v (vec_s32) zerov 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 55cabdff1aSopenharmony_ci#include <altivec.h> 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_ci// used to build registers permutation vectors (vcprm) 58cabdff1aSopenharmony_ci// the 's' are for words in the _s_econd vector 59cabdff1aSopenharmony_ci#define WORD_0 0x00,0x01,0x02,0x03 60cabdff1aSopenharmony_ci#define WORD_1 0x04,0x05,0x06,0x07 61cabdff1aSopenharmony_ci#define WORD_2 0x08,0x09,0x0a,0x0b 62cabdff1aSopenharmony_ci#define WORD_3 0x0c,0x0d,0x0e,0x0f 63cabdff1aSopenharmony_ci#define WORD_s0 0x10,0x11,0x12,0x13 64cabdff1aSopenharmony_ci#define WORD_s1 0x14,0x15,0x16,0x17 65cabdff1aSopenharmony_ci#define WORD_s2 0x18,0x19,0x1a,0x1b 66cabdff1aSopenharmony_ci#define WORD_s3 0x1c,0x1d,0x1e,0x1f 67cabdff1aSopenharmony_ci#define vcprm(a,b,c,d) (const vec_u8){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci#define SWP_W2S0 0x02,0x03,0x00,0x01 70cabdff1aSopenharmony_ci#define SWP_W2S1 0x06,0x07,0x04,0x05 71cabdff1aSopenharmony_ci#define SWP_W2S2 0x0a,0x0b,0x08,0x09 72cabdff1aSopenharmony_ci#define SWP_W2S3 0x0e,0x0f,0x0c,0x0d 73cabdff1aSopenharmony_ci#define SWP_W2Ss0 0x12,0x13,0x10,0x11 74cabdff1aSopenharmony_ci#define SWP_W2Ss1 0x16,0x17,0x14,0x15 75cabdff1aSopenharmony_ci#define SWP_W2Ss2 0x1a,0x1b,0x18,0x19 76cabdff1aSopenharmony_ci#define SWP_W2Ss3 0x1e,0x1f,0x1c,0x1d 77cabdff1aSopenharmony_ci#define vcswapi2s(a,b,c,d) (const vector unsigned char){SWP_W2S ## a, SWP_W2S ## b, SWP_W2S ## c, SWP_W2S ## d} 78cabdff1aSopenharmony_ci 79cabdff1aSopenharmony_ci#define vcswapc() \ 80cabdff1aSopenharmony_ci (const vector unsigned char){0x0f,0x0e,0x0d,0x0c,0x0b,0x0a,0x09,0x08,0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00} 81cabdff1aSopenharmony_ci 82cabdff1aSopenharmony_ci 83cabdff1aSopenharmony_ci// Transpose 8x8 matrix of 16-bit elements (in-place) 84cabdff1aSopenharmony_ci#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ 85cabdff1aSopenharmony_cido { \ 86cabdff1aSopenharmony_ci vec_s16 A1, B1, C1, D1, E1, F1, G1, H1; \ 87cabdff1aSopenharmony_ci vec_s16 A2, B2, C2, D2, E2, F2, G2, H2; \ 88cabdff1aSopenharmony_ci \ 89cabdff1aSopenharmony_ci A1 = vec_mergeh (a, e); \ 90cabdff1aSopenharmony_ci B1 = vec_mergel (a, e); \ 91cabdff1aSopenharmony_ci C1 = vec_mergeh (b, f); \ 92cabdff1aSopenharmony_ci D1 = vec_mergel (b, f); \ 93cabdff1aSopenharmony_ci E1 = vec_mergeh (c, g); \ 94cabdff1aSopenharmony_ci F1 = vec_mergel (c, g); \ 95cabdff1aSopenharmony_ci G1 = vec_mergeh (d, h); \ 96cabdff1aSopenharmony_ci H1 = vec_mergel (d, h); \ 97cabdff1aSopenharmony_ci \ 98cabdff1aSopenharmony_ci A2 = vec_mergeh (A1, E1); \ 99cabdff1aSopenharmony_ci B2 = vec_mergel (A1, E1); \ 100cabdff1aSopenharmony_ci C2 = vec_mergeh (B1, F1); \ 101cabdff1aSopenharmony_ci D2 = vec_mergel (B1, F1); \ 102cabdff1aSopenharmony_ci E2 = vec_mergeh (C1, G1); \ 103cabdff1aSopenharmony_ci F2 = vec_mergel (C1, G1); \ 104cabdff1aSopenharmony_ci G2 = vec_mergeh (D1, H1); \ 105cabdff1aSopenharmony_ci H2 = vec_mergel (D1, H1); \ 106cabdff1aSopenharmony_ci \ 107cabdff1aSopenharmony_ci a = vec_mergeh (A2, E2); \ 108cabdff1aSopenharmony_ci b = vec_mergel (A2, E2); \ 109cabdff1aSopenharmony_ci c = vec_mergeh (B2, F2); \ 110cabdff1aSopenharmony_ci d = vec_mergel (B2, F2); \ 111cabdff1aSopenharmony_ci e = vec_mergeh (C2, G2); \ 112cabdff1aSopenharmony_ci f = vec_mergel (C2, G2); \ 113cabdff1aSopenharmony_ci g = vec_mergeh (D2, H2); \ 114cabdff1aSopenharmony_ci h = vec_mergel (D2, H2); \ 115cabdff1aSopenharmony_ci} while (0) 116cabdff1aSopenharmony_ci 117cabdff1aSopenharmony_ci 118cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 119cabdff1aSopenharmony_ci#define VEC_LD(offset,b) \ 120cabdff1aSopenharmony_ci vec_perm(vec_ld(offset, b), vec_ld((offset)+15, b), vec_lvsl(offset, b)) 121cabdff1aSopenharmony_ci#else 122cabdff1aSopenharmony_ci#define VEC_LD(offset,b) \ 123cabdff1aSopenharmony_ci vec_vsx_ld(offset, b) 124cabdff1aSopenharmony_ci#endif 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci/** @brief loads unaligned vector @a *src with offset @a offset 127cabdff1aSopenharmony_ci and returns it */ 128cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 129cabdff1aSopenharmony_cistatic inline vec_u8 unaligned_load(int offset, const uint8_t *src) 130cabdff1aSopenharmony_ci{ 131cabdff1aSopenharmony_ci register vec_u8 first = vec_ld(offset, src); 132cabdff1aSopenharmony_ci register vec_u8 second = vec_ld(offset + 15, src); 133cabdff1aSopenharmony_ci register vec_u8 mask = vec_lvsl(offset, src); 134cabdff1aSopenharmony_ci return vec_perm(first, second, mask); 135cabdff1aSopenharmony_ci} 136cabdff1aSopenharmony_cistatic inline vec_u8 load_with_perm_vec(int offset, const uint8_t *src, vec_u8 perm_vec) 137cabdff1aSopenharmony_ci{ 138cabdff1aSopenharmony_ci vec_u8 a = vec_ld(offset, src); 139cabdff1aSopenharmony_ci vec_u8 b = vec_ld(offset + 15, src); 140cabdff1aSopenharmony_ci return vec_perm(a, b, perm_vec); 141cabdff1aSopenharmony_ci} 142cabdff1aSopenharmony_ci#else 143cabdff1aSopenharmony_ci#define unaligned_load(a,b) VEC_LD(a,b) 144cabdff1aSopenharmony_ci#define load_with_perm_vec(a,b,c) VEC_LD(a,b) 145cabdff1aSopenharmony_ci#endif 146cabdff1aSopenharmony_ci 147cabdff1aSopenharmony_ci 148cabdff1aSopenharmony_ci/** 149cabdff1aSopenharmony_ci * loads vector known misalignment 150cabdff1aSopenharmony_ci * @param perm_vec the align permute vector to combine the two loads from lvsl 151cabdff1aSopenharmony_ci */ 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_ci#define vec_unaligned_load(b) VEC_LD(0, b) 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 156cabdff1aSopenharmony_ci#define VEC_MERGEH(a, b) vec_mergeh(a, b) 157cabdff1aSopenharmony_ci#define VEC_MERGEL(a, b) vec_mergel(a, b) 158cabdff1aSopenharmony_ci#else 159cabdff1aSopenharmony_ci#define VEC_MERGEH(a, b) vec_mergeh(b, a) 160cabdff1aSopenharmony_ci#define VEC_MERGEL(a, b) vec_mergel(b, a) 161cabdff1aSopenharmony_ci#endif 162cabdff1aSopenharmony_ci 163cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 164cabdff1aSopenharmony_ci#define VEC_ST(a,b,c) vec_st(a,b,c) 165cabdff1aSopenharmony_ci#else 166cabdff1aSopenharmony_ci#define VEC_ST(a,b,c) vec_vsx_st(a,b,c) 167cabdff1aSopenharmony_ci#endif 168cabdff1aSopenharmony_ci 169cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 170cabdff1aSopenharmony_ci#define VEC_SPLAT16(a,b) vec_splat((vec_s16)(a), b) 171cabdff1aSopenharmony_ci#else 172cabdff1aSopenharmony_ci#define VEC_SPLAT16(a,b) vec_splat((vec_s16)(vec_perm(a, a, vcswapi2s(0,1,2,3))), b) 173cabdff1aSopenharmony_ci#endif 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 176cabdff1aSopenharmony_ci#define VEC_SLD16(a,b,c) vec_sld(a, b, c) 177cabdff1aSopenharmony_ci#else 178cabdff1aSopenharmony_ci#define VEC_SLD16(a,b,c) vec_sld(b, a, c) 179cabdff1aSopenharmony_ci#endif 180cabdff1aSopenharmony_ci 181cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci#if HAVE_VSX 184cabdff1aSopenharmony_ci#if HAVE_BIGENDIAN 185cabdff1aSopenharmony_ci#define vsx_ld_u8_s16(off, p) \ 186cabdff1aSopenharmony_ci ((vec_s16)vec_mergeh((vec_u8)vec_splat_u8(0), \ 187cabdff1aSopenharmony_ci (vec_u8)vec_vsx_ld((off), (p)))) 188cabdff1aSopenharmony_ci#else 189cabdff1aSopenharmony_ci#define vsx_ld_u8_s16(off, p) \ 190cabdff1aSopenharmony_ci ((vec_s16)vec_mergeh((vec_u8)vec_vsx_ld((off), (p)), \ 191cabdff1aSopenharmony_ci (vec_u8)vec_splat_u8(0))) 192cabdff1aSopenharmony_ci#endif /* HAVE_BIGENDIAN */ 193cabdff1aSopenharmony_ci#endif /* HAVE_VSX */ 194cabdff1aSopenharmony_ci 195cabdff1aSopenharmony_ci#endif /* AVUTIL_PPC_UTIL_ALTIVEC_H */ 196