1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * This file is part of FFmpeg. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 5cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 6cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 7cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12cabdff1aSopenharmony_ci * Lesser General Public License for more details. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 15cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 16cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17cabdff1aSopenharmony_ci */ 18cabdff1aSopenharmony_ci 19cabdff1aSopenharmony_ci#include <stdint.h> 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 22cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 23cabdff1aSopenharmony_ci#include "libavcodec/vc1dsp.h" 24cabdff1aSopenharmony_ci#include "vc1dsp.h" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_neon(int16_t *block); 27cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 28cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 29cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 32cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 33cabdff1aSopenharmony_civoid ff_vc1_inv_trans_8x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 34cabdff1aSopenharmony_civoid ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter4_neon(uint8_t *src, int stride, int pq); 37cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter4_neon(uint8_t *src, int stride, int pq); 38cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter8_neon(uint8_t *src, int stride, int pq); 39cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter8_neon(uint8_t *src, int stride, int pq); 40cabdff1aSopenharmony_civoid ff_vc1_v_loop_filter16_neon(uint8_t *src, int stride, int pq); 41cabdff1aSopenharmony_civoid ff_vc1_h_loop_filter16_neon(uint8_t *src, int stride, int pq); 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_civoid ff_put_pixels8x8_neon(uint8_t *block, const uint8_t *pixels, 44cabdff1aSopenharmony_ci ptrdiff_t line_size, int rnd); 45cabdff1aSopenharmony_ci 46cabdff1aSopenharmony_ci#define DECL_PUT(X, Y) \ 47cabdff1aSopenharmony_civoid ff_put_vc1_mspel_mc##X##Y##_neon(uint8_t *dst, const uint8_t *src, \ 48cabdff1aSopenharmony_ci ptrdiff_t stride, int rnd); \ 49cabdff1aSopenharmony_cistatic void ff_put_vc1_mspel_mc##X##Y##_16_neon(uint8_t *dst, const uint8_t *src, \ 50cabdff1aSopenharmony_ci ptrdiff_t stride, int rnd) \ 51cabdff1aSopenharmony_ci{ \ 52cabdff1aSopenharmony_ci ff_put_vc1_mspel_mc##X##Y##_neon(dst+0, src+0, stride, rnd); \ 53cabdff1aSopenharmony_ci ff_put_vc1_mspel_mc##X##Y##_neon(dst+8, src+8, stride, rnd); \ 54cabdff1aSopenharmony_ci dst += 8*stride; src += 8*stride; \ 55cabdff1aSopenharmony_ci ff_put_vc1_mspel_mc##X##Y##_neon(dst+0, src+0, stride, rnd); \ 56cabdff1aSopenharmony_ci ff_put_vc1_mspel_mc##X##Y##_neon(dst+8, src+8, stride, rnd); \ 57cabdff1aSopenharmony_ci} 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_ciDECL_PUT(1, 0) 60cabdff1aSopenharmony_ciDECL_PUT(2, 0) 61cabdff1aSopenharmony_ciDECL_PUT(3, 0) 62cabdff1aSopenharmony_ci 63cabdff1aSopenharmony_ciDECL_PUT(0, 1) 64cabdff1aSopenharmony_ciDECL_PUT(0, 2) 65cabdff1aSopenharmony_ciDECL_PUT(0, 3) 66cabdff1aSopenharmony_ci 67cabdff1aSopenharmony_ciDECL_PUT(1, 1) 68cabdff1aSopenharmony_ciDECL_PUT(1, 2) 69cabdff1aSopenharmony_ciDECL_PUT(1, 3) 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_ciDECL_PUT(2, 1) 72cabdff1aSopenharmony_ciDECL_PUT(2, 2) 73cabdff1aSopenharmony_ciDECL_PUT(2, 3) 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ciDECL_PUT(3, 1) 76cabdff1aSopenharmony_ciDECL_PUT(3, 2) 77cabdff1aSopenharmony_ciDECL_PUT(3, 3) 78cabdff1aSopenharmony_ci 79cabdff1aSopenharmony_civoid ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 80cabdff1aSopenharmony_ci int h, int x, int y); 81cabdff1aSopenharmony_civoid ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 82cabdff1aSopenharmony_ci int h, int x, int y); 83cabdff1aSopenharmony_civoid ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 84cabdff1aSopenharmony_ci int h, int x, int y); 85cabdff1aSopenharmony_civoid ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, 86cabdff1aSopenharmony_ci int h, int x, int y); 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ciint ff_vc1_unescape_buffer_helper_neon(const uint8_t *src, int size, uint8_t *dst); 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_cistatic int vc1_unescape_buffer_neon(const uint8_t *src, int size, uint8_t *dst) 91cabdff1aSopenharmony_ci{ 92cabdff1aSopenharmony_ci /* Dealing with starting and stopping, and removing escape bytes, are 93cabdff1aSopenharmony_ci * comparatively less time-sensitive, so are more clearly expressed using 94cabdff1aSopenharmony_ci * a C wrapper around the assembly inner loop. Note that we assume a 95cabdff1aSopenharmony_ci * little-endian machine that supports unaligned loads. */ 96cabdff1aSopenharmony_ci int dsize = 0; 97cabdff1aSopenharmony_ci while (size >= 4) 98cabdff1aSopenharmony_ci { 99cabdff1aSopenharmony_ci int found = 0; 100cabdff1aSopenharmony_ci while (!found && (((uintptr_t) dst) & 7) && size >= 4) 101cabdff1aSopenharmony_ci { 102cabdff1aSopenharmony_ci found = (AV_RL32(src) &~ 0x03000000) == 0x00030000; 103cabdff1aSopenharmony_ci if (!found) 104cabdff1aSopenharmony_ci { 105cabdff1aSopenharmony_ci *dst++ = *src++; 106cabdff1aSopenharmony_ci --size; 107cabdff1aSopenharmony_ci ++dsize; 108cabdff1aSopenharmony_ci } 109cabdff1aSopenharmony_ci } 110cabdff1aSopenharmony_ci if (!found) 111cabdff1aSopenharmony_ci { 112cabdff1aSopenharmony_ci int skip = size - ff_vc1_unescape_buffer_helper_neon(src, size, dst); 113cabdff1aSopenharmony_ci dst += skip; 114cabdff1aSopenharmony_ci src += skip; 115cabdff1aSopenharmony_ci size -= skip; 116cabdff1aSopenharmony_ci dsize += skip; 117cabdff1aSopenharmony_ci while (!found && size >= 4) 118cabdff1aSopenharmony_ci { 119cabdff1aSopenharmony_ci found = (AV_RL32(src) &~ 0x03000000) == 0x00030000; 120cabdff1aSopenharmony_ci if (!found) 121cabdff1aSopenharmony_ci { 122cabdff1aSopenharmony_ci *dst++ = *src++; 123cabdff1aSopenharmony_ci --size; 124cabdff1aSopenharmony_ci ++dsize; 125cabdff1aSopenharmony_ci } 126cabdff1aSopenharmony_ci } 127cabdff1aSopenharmony_ci } 128cabdff1aSopenharmony_ci if (found) 129cabdff1aSopenharmony_ci { 130cabdff1aSopenharmony_ci *dst++ = *src++; 131cabdff1aSopenharmony_ci *dst++ = *src++; 132cabdff1aSopenharmony_ci ++src; 133cabdff1aSopenharmony_ci size -= 3; 134cabdff1aSopenharmony_ci dsize += 2; 135cabdff1aSopenharmony_ci } 136cabdff1aSopenharmony_ci } 137cabdff1aSopenharmony_ci while (size > 0) 138cabdff1aSopenharmony_ci { 139cabdff1aSopenharmony_ci *dst++ = *src++; 140cabdff1aSopenharmony_ci --size; 141cabdff1aSopenharmony_ci ++dsize; 142cabdff1aSopenharmony_ci } 143cabdff1aSopenharmony_ci return dsize; 144cabdff1aSopenharmony_ci} 145cabdff1aSopenharmony_ci 146cabdff1aSopenharmony_ci#define FN_ASSIGN(X, Y) \ 147cabdff1aSopenharmony_ci dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = ff_put_vc1_mspel_mc##X##Y##_16_neon; \ 148cabdff1aSopenharmony_ci dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = ff_put_vc1_mspel_mc##X##Y##_neon 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ciav_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp) 151cabdff1aSopenharmony_ci{ 152cabdff1aSopenharmony_ci dsp->vc1_inv_trans_8x8 = ff_vc1_inv_trans_8x8_neon; 153cabdff1aSopenharmony_ci dsp->vc1_inv_trans_4x8 = ff_vc1_inv_trans_4x8_neon; 154cabdff1aSopenharmony_ci dsp->vc1_inv_trans_8x4 = ff_vc1_inv_trans_8x4_neon; 155cabdff1aSopenharmony_ci dsp->vc1_inv_trans_4x4 = ff_vc1_inv_trans_4x4_neon; 156cabdff1aSopenharmony_ci dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_neon; 157cabdff1aSopenharmony_ci dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_neon; 158cabdff1aSopenharmony_ci dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_neon; 159cabdff1aSopenharmony_ci dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon; 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_ci dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_neon; 162cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_neon; 163cabdff1aSopenharmony_ci dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_neon; 164cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_neon; 165cabdff1aSopenharmony_ci dsp->vc1_v_loop_filter16 = ff_vc1_v_loop_filter16_neon; 166cabdff1aSopenharmony_ci dsp->vc1_h_loop_filter16 = ff_vc1_h_loop_filter16_neon; 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci dsp->put_vc1_mspel_pixels_tab[1][ 0] = ff_put_pixels8x8_neon; 169cabdff1aSopenharmony_ci FN_ASSIGN(1, 0); 170cabdff1aSopenharmony_ci FN_ASSIGN(2, 0); 171cabdff1aSopenharmony_ci FN_ASSIGN(3, 0); 172cabdff1aSopenharmony_ci 173cabdff1aSopenharmony_ci FN_ASSIGN(0, 1); 174cabdff1aSopenharmony_ci FN_ASSIGN(1, 1); 175cabdff1aSopenharmony_ci FN_ASSIGN(2, 1); 176cabdff1aSopenharmony_ci FN_ASSIGN(3, 1); 177cabdff1aSopenharmony_ci 178cabdff1aSopenharmony_ci FN_ASSIGN(0, 2); 179cabdff1aSopenharmony_ci FN_ASSIGN(1, 2); 180cabdff1aSopenharmony_ci FN_ASSIGN(2, 2); 181cabdff1aSopenharmony_ci FN_ASSIGN(3, 2); 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci FN_ASSIGN(0, 3); 184cabdff1aSopenharmony_ci FN_ASSIGN(1, 3); 185cabdff1aSopenharmony_ci FN_ASSIGN(2, 3); 186cabdff1aSopenharmony_ci FN_ASSIGN(3, 3); 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_ci dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon; 189cabdff1aSopenharmony_ci dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon; 190cabdff1aSopenharmony_ci dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon; 191cabdff1aSopenharmony_ci dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon; 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci dsp->vc1_unescape_buffer = vc1_unescape_buffer_neon; 194cabdff1aSopenharmony_ci} 195