1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2010 Mans Rullgard 3cabdff1aSopenharmony_ci * Copyright (c) 2014 James Yu <james.yu@linaro.org> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include <arm_neon.h> 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "config.h" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 27cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 28cabdff1aSopenharmony_ci#if ARCH_AARCH64 29cabdff1aSopenharmony_ci# include "libavutil/aarch64/cpu.h" 30cabdff1aSopenharmony_ci#elif ARCH_ARM 31cabdff1aSopenharmony_ci# include "libavutil/arm/cpu.h" 32cabdff1aSopenharmony_ci#endif 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci#include "libavcodec/mpegvideo.h" 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_cistatic void inline ff_dct_unquantize_h263_neon(int qscale, int qadd, int nCoeffs, 37cabdff1aSopenharmony_ci int16_t *block) 38cabdff1aSopenharmony_ci{ 39cabdff1aSopenharmony_ci int16x8_t q0s16, q2s16, q3s16, q8s16, q10s16, q11s16, q13s16; 40cabdff1aSopenharmony_ci int16x8_t q14s16, q15s16, qzs16; 41cabdff1aSopenharmony_ci int16x4_t d0s16, d2s16, d3s16, dzs16; 42cabdff1aSopenharmony_ci uint16x8_t q1u16, q9u16; 43cabdff1aSopenharmony_ci uint16x4_t d1u16; 44cabdff1aSopenharmony_ci 45cabdff1aSopenharmony_ci dzs16 = vdup_n_s16(0); 46cabdff1aSopenharmony_ci qzs16 = vdupq_n_s16(0); 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ci q15s16 = vdupq_n_s16(qscale << 1); 49cabdff1aSopenharmony_ci q14s16 = vdupq_n_s16(qadd); 50cabdff1aSopenharmony_ci q13s16 = vnegq_s16(q14s16); 51cabdff1aSopenharmony_ci 52cabdff1aSopenharmony_ci if (nCoeffs > 4) { 53cabdff1aSopenharmony_ci for (; nCoeffs > 8; nCoeffs -= 16, block += 16) { 54cabdff1aSopenharmony_ci q0s16 = vld1q_s16(block); 55cabdff1aSopenharmony_ci q3s16 = vreinterpretq_s16_u16(vcltq_s16(q0s16, qzs16)); 56cabdff1aSopenharmony_ci q8s16 = vld1q_s16(block + 8); 57cabdff1aSopenharmony_ci q1u16 = vceqq_s16(q0s16, qzs16); 58cabdff1aSopenharmony_ci q2s16 = vmulq_s16(q0s16, q15s16); 59cabdff1aSopenharmony_ci q11s16 = vreinterpretq_s16_u16(vcltq_s16(q8s16, qzs16)); 60cabdff1aSopenharmony_ci q10s16 = vmulq_s16(q8s16, q15s16); 61cabdff1aSopenharmony_ci q3s16 = vbslq_s16(vreinterpretq_u16_s16(q3s16), q13s16, q14s16); 62cabdff1aSopenharmony_ci q11s16 = vbslq_s16(vreinterpretq_u16_s16(q11s16), q13s16, q14s16); 63cabdff1aSopenharmony_ci q2s16 = vaddq_s16(q2s16, q3s16); 64cabdff1aSopenharmony_ci q9u16 = vceqq_s16(q8s16, qzs16); 65cabdff1aSopenharmony_ci q10s16 = vaddq_s16(q10s16, q11s16); 66cabdff1aSopenharmony_ci q0s16 = vbslq_s16(q1u16, q0s16, q2s16); 67cabdff1aSopenharmony_ci q8s16 = vbslq_s16(q9u16, q8s16, q10s16); 68cabdff1aSopenharmony_ci vst1q_s16(block, q0s16); 69cabdff1aSopenharmony_ci vst1q_s16(block + 8, q8s16); 70cabdff1aSopenharmony_ci } 71cabdff1aSopenharmony_ci } 72cabdff1aSopenharmony_ci if (nCoeffs <= 0) 73cabdff1aSopenharmony_ci return; 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ci d0s16 = vld1_s16(block); 76cabdff1aSopenharmony_ci d3s16 = vreinterpret_s16_u16(vclt_s16(d0s16, dzs16)); 77cabdff1aSopenharmony_ci d1u16 = vceq_s16(d0s16, dzs16); 78cabdff1aSopenharmony_ci d2s16 = vmul_s16(d0s16, vget_high_s16(q15s16)); 79cabdff1aSopenharmony_ci d3s16 = vbsl_s16(vreinterpret_u16_s16(d3s16), 80cabdff1aSopenharmony_ci vget_high_s16(q13s16), vget_high_s16(q14s16)); 81cabdff1aSopenharmony_ci d2s16 = vadd_s16(d2s16, d3s16); 82cabdff1aSopenharmony_ci d0s16 = vbsl_s16(d1u16, d0s16, d2s16); 83cabdff1aSopenharmony_ci vst1_s16(block, d0s16); 84cabdff1aSopenharmony_ci} 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_cistatic void dct_unquantize_h263_inter_neon(MpegEncContext *s, int16_t *block, 87cabdff1aSopenharmony_ci int n, int qscale) 88cabdff1aSopenharmony_ci{ 89cabdff1aSopenharmony_ci int nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; 90cabdff1aSopenharmony_ci int qadd = (qscale - 1) | 1; 91cabdff1aSopenharmony_ci 92cabdff1aSopenharmony_ci ff_dct_unquantize_h263_neon(qscale, qadd, nCoeffs + 1, block); 93cabdff1aSopenharmony_ci} 94cabdff1aSopenharmony_ci 95cabdff1aSopenharmony_cistatic void dct_unquantize_h263_intra_neon(MpegEncContext *s, int16_t *block, 96cabdff1aSopenharmony_ci int n, int qscale) 97cabdff1aSopenharmony_ci{ 98cabdff1aSopenharmony_ci int qadd; 99cabdff1aSopenharmony_ci int nCoeffs, blk0; 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci if (!s->h263_aic) { 102cabdff1aSopenharmony_ci if (n < 4) 103cabdff1aSopenharmony_ci block[0] *= s->y_dc_scale; 104cabdff1aSopenharmony_ci else 105cabdff1aSopenharmony_ci block[0] *= s->c_dc_scale; 106cabdff1aSopenharmony_ci qadd = (qscale - 1) | 1; 107cabdff1aSopenharmony_ci } else { 108cabdff1aSopenharmony_ci qadd = 0; 109cabdff1aSopenharmony_ci } 110cabdff1aSopenharmony_ci 111cabdff1aSopenharmony_ci if (s->ac_pred) { 112cabdff1aSopenharmony_ci nCoeffs = 63; 113cabdff1aSopenharmony_ci } else { 114cabdff1aSopenharmony_ci nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; 115cabdff1aSopenharmony_ci if (nCoeffs <= 0) 116cabdff1aSopenharmony_ci return; 117cabdff1aSopenharmony_ci } 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_ci blk0 = block[0]; 120cabdff1aSopenharmony_ci 121cabdff1aSopenharmony_ci ff_dct_unquantize_h263_neon(qscale, qadd, nCoeffs + 1, block); 122cabdff1aSopenharmony_ci 123cabdff1aSopenharmony_ci block[0] = blk0; 124cabdff1aSopenharmony_ci} 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_ciav_cold void ff_mpv_common_init_neon(MpegEncContext *s) 128cabdff1aSopenharmony_ci{ 129cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci if (have_neon(cpu_flags)) { 132cabdff1aSopenharmony_ci s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_neon; 133cabdff1aSopenharmony_ci s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_neon; 134cabdff1aSopenharmony_ci } 135cabdff1aSopenharmony_ci} 136