1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com) 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/mips/generic_macros_msa.h" 22cabdff1aSopenharmony_ci#include "h263dsp_mips.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_cistatic void h263_dct_unquantize_msa(int16_t *block, int16_t qmul, 25cabdff1aSopenharmony_ci int16_t qadd, int8_t n_coeffs, 26cabdff1aSopenharmony_ci uint8_t loop_start) 27cabdff1aSopenharmony_ci{ 28cabdff1aSopenharmony_ci int16_t *block_dup = block; 29cabdff1aSopenharmony_ci int32_t level, cnt; 30cabdff1aSopenharmony_ci v8i16 block_vec, qmul_vec, qadd_vec, sub; 31cabdff1aSopenharmony_ci v8i16 add, mask, mul, zero_mask; 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci qmul_vec = __msa_fill_h(qmul); 34cabdff1aSopenharmony_ci qadd_vec = __msa_fill_h(qadd); 35cabdff1aSopenharmony_ci for (cnt = 0; cnt < (n_coeffs >> 3); cnt++) { 36cabdff1aSopenharmony_ci block_vec = LD_SH(block_dup + loop_start); 37cabdff1aSopenharmony_ci mask = __msa_clti_s_h(block_vec, 0); 38cabdff1aSopenharmony_ci zero_mask = __msa_ceqi_h(block_vec, 0); 39cabdff1aSopenharmony_ci mul = block_vec * qmul_vec; 40cabdff1aSopenharmony_ci sub = mul - qadd_vec; 41cabdff1aSopenharmony_ci add = mul + qadd_vec; 42cabdff1aSopenharmony_ci add = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) sub, (v16u8) mask); 43cabdff1aSopenharmony_ci block_vec = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) block_vec, 44cabdff1aSopenharmony_ci (v16u8) zero_mask); 45cabdff1aSopenharmony_ci ST_SH(block_vec, block_dup + loop_start); 46cabdff1aSopenharmony_ci block_dup += 8; 47cabdff1aSopenharmony_ci } 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ci cnt = ((n_coeffs >> 3) * 8) + loop_start; 50cabdff1aSopenharmony_ci 51cabdff1aSopenharmony_ci for (; cnt <= n_coeffs; cnt++) { 52cabdff1aSopenharmony_ci level = block[cnt]; 53cabdff1aSopenharmony_ci if (level) { 54cabdff1aSopenharmony_ci if (level < 0) { 55cabdff1aSopenharmony_ci level = level * qmul - qadd; 56cabdff1aSopenharmony_ci } else { 57cabdff1aSopenharmony_ci level = level * qmul + qadd; 58cabdff1aSopenharmony_ci } 59cabdff1aSopenharmony_ci block[cnt] = level; 60cabdff1aSopenharmony_ci } 61cabdff1aSopenharmony_ci } 62cabdff1aSopenharmony_ci} 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_cistatic int32_t mpeg2_dct_unquantize_inter_msa(int16_t *block, 65cabdff1aSopenharmony_ci int32_t qscale, 66cabdff1aSopenharmony_ci const int16_t *quant_matrix) 67cabdff1aSopenharmony_ci{ 68cabdff1aSopenharmony_ci int32_t cnt, sum_res = -1; 69cabdff1aSopenharmony_ci v8i16 block_vec, block_neg, qscale_vec, mask; 70cabdff1aSopenharmony_ci v8i16 block_org0, block_org1, block_org2, block_org3; 71cabdff1aSopenharmony_ci v8i16 quant_m0, quant_m1, quant_m2, quant_m3; 72cabdff1aSopenharmony_ci v8i16 sum, mul, zero_mask; 73cabdff1aSopenharmony_ci v4i32 mul_vec, qscale_l, qscale_r, quant_m_r, quant_m_l; 74cabdff1aSopenharmony_ci v4i32 block_l, block_r, sad; 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci qscale_vec = __msa_fill_h(qscale); 77cabdff1aSopenharmony_ci for (cnt = 0; cnt < 2; cnt++) { 78cabdff1aSopenharmony_ci LD_SH4(block, 8, block_org0, block_org1, block_org2, block_org3); 79cabdff1aSopenharmony_ci LD_SH4(quant_matrix, 8, quant_m0, quant_m1, quant_m2, quant_m3); 80cabdff1aSopenharmony_ci mask = __msa_clti_s_h(block_org0, 0); 81cabdff1aSopenharmony_ci zero_mask = __msa_ceqi_h(block_org0, 0); 82cabdff1aSopenharmony_ci block_neg = -block_org0; 83cabdff1aSopenharmony_ci block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org0, (v16u8) block_neg, 84cabdff1aSopenharmony_ci (v16u8) mask); 85cabdff1aSopenharmony_ci block_vec <<= 1; 86cabdff1aSopenharmony_ci block_vec += 1; 87cabdff1aSopenharmony_ci UNPCK_SH_SW(block_vec, block_r, block_l); 88cabdff1aSopenharmony_ci UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l); 89cabdff1aSopenharmony_ci UNPCK_SH_SW(quant_m0, quant_m_r, quant_m_l); 90cabdff1aSopenharmony_ci mul_vec = block_l * qscale_l; 91cabdff1aSopenharmony_ci mul_vec *= quant_m_l; 92cabdff1aSopenharmony_ci block_l = mul_vec >> 4; 93cabdff1aSopenharmony_ci mul_vec = block_r * qscale_r; 94cabdff1aSopenharmony_ci mul_vec *= quant_m_r; 95cabdff1aSopenharmony_ci block_r = mul_vec >> 4; 96cabdff1aSopenharmony_ci mul = (v8i16) __msa_pckev_h((v8i16) block_l, (v8i16) block_r); 97cabdff1aSopenharmony_ci block_neg = - mul; 98cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg, 99cabdff1aSopenharmony_ci (v16u8) mask); 100cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org0, 101cabdff1aSopenharmony_ci (v16u8) zero_mask); 102cabdff1aSopenharmony_ci ST_SH(sum, block); 103cabdff1aSopenharmony_ci block += 8; 104cabdff1aSopenharmony_ci quant_matrix += 8; 105cabdff1aSopenharmony_ci sad = __msa_hadd_s_w(sum, sum); 106cabdff1aSopenharmony_ci sum_res += HADD_SW_S32(sad); 107cabdff1aSopenharmony_ci mask = __msa_clti_s_h(block_org1, 0); 108cabdff1aSopenharmony_ci zero_mask = __msa_ceqi_h(block_org1, 0); 109cabdff1aSopenharmony_ci block_neg = - block_org1; 110cabdff1aSopenharmony_ci block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org1, (v16u8) block_neg, 111cabdff1aSopenharmony_ci (v16u8) mask); 112cabdff1aSopenharmony_ci block_vec <<= 1; 113cabdff1aSopenharmony_ci block_vec += 1; 114cabdff1aSopenharmony_ci UNPCK_SH_SW(block_vec, block_r, block_l); 115cabdff1aSopenharmony_ci UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l); 116cabdff1aSopenharmony_ci UNPCK_SH_SW(quant_m1, quant_m_r, quant_m_l); 117cabdff1aSopenharmony_ci mul_vec = block_l * qscale_l; 118cabdff1aSopenharmony_ci mul_vec *= quant_m_l; 119cabdff1aSopenharmony_ci block_l = mul_vec >> 4; 120cabdff1aSopenharmony_ci mul_vec = block_r * qscale_r; 121cabdff1aSopenharmony_ci mul_vec *= quant_m_r; 122cabdff1aSopenharmony_ci block_r = mul_vec >> 4; 123cabdff1aSopenharmony_ci mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r); 124cabdff1aSopenharmony_ci block_neg = - mul; 125cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg, 126cabdff1aSopenharmony_ci (v16u8) mask); 127cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org1, 128cabdff1aSopenharmony_ci (v16u8) zero_mask); 129cabdff1aSopenharmony_ci ST_SH(sum, block); 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci block += 8; 132cabdff1aSopenharmony_ci quant_matrix += 8; 133cabdff1aSopenharmony_ci sad = __msa_hadd_s_w(sum, sum); 134cabdff1aSopenharmony_ci sum_res += HADD_SW_S32(sad); 135cabdff1aSopenharmony_ci mask = __msa_clti_s_h(block_org2, 0); 136cabdff1aSopenharmony_ci zero_mask = __msa_ceqi_h(block_org2, 0); 137cabdff1aSopenharmony_ci block_neg = - block_org2; 138cabdff1aSopenharmony_ci block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org2, (v16u8) block_neg, 139cabdff1aSopenharmony_ci (v16u8) mask); 140cabdff1aSopenharmony_ci block_vec <<= 1; 141cabdff1aSopenharmony_ci block_vec += 1; 142cabdff1aSopenharmony_ci UNPCK_SH_SW(block_vec, block_r, block_l); 143cabdff1aSopenharmony_ci UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l); 144cabdff1aSopenharmony_ci UNPCK_SH_SW(quant_m2, quant_m_r, quant_m_l); 145cabdff1aSopenharmony_ci mul_vec = block_l * qscale_l; 146cabdff1aSopenharmony_ci mul_vec *= quant_m_l; 147cabdff1aSopenharmony_ci block_l = mul_vec >> 4; 148cabdff1aSopenharmony_ci mul_vec = block_r * qscale_r; 149cabdff1aSopenharmony_ci mul_vec *= quant_m_r; 150cabdff1aSopenharmony_ci block_r = mul_vec >> 4; 151cabdff1aSopenharmony_ci mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r); 152cabdff1aSopenharmony_ci block_neg = - mul; 153cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg, 154cabdff1aSopenharmony_ci (v16u8) mask); 155cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org2, 156cabdff1aSopenharmony_ci (v16u8) zero_mask); 157cabdff1aSopenharmony_ci ST_SH(sum, block); 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_ci block += 8; 160cabdff1aSopenharmony_ci quant_matrix += 8; 161cabdff1aSopenharmony_ci sad = __msa_hadd_s_w(sum, sum); 162cabdff1aSopenharmony_ci sum_res += HADD_SW_S32(sad); 163cabdff1aSopenharmony_ci mask = __msa_clti_s_h(block_org3, 0); 164cabdff1aSopenharmony_ci zero_mask = __msa_ceqi_h(block_org3, 0); 165cabdff1aSopenharmony_ci block_neg = - block_org3; 166cabdff1aSopenharmony_ci block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org3, (v16u8) block_neg, 167cabdff1aSopenharmony_ci (v16u8) mask); 168cabdff1aSopenharmony_ci block_vec <<= 1; 169cabdff1aSopenharmony_ci block_vec += 1; 170cabdff1aSopenharmony_ci UNPCK_SH_SW(block_vec, block_r, block_l); 171cabdff1aSopenharmony_ci UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l); 172cabdff1aSopenharmony_ci UNPCK_SH_SW(quant_m3, quant_m_r, quant_m_l); 173cabdff1aSopenharmony_ci mul_vec = block_l * qscale_l; 174cabdff1aSopenharmony_ci mul_vec *= quant_m_l; 175cabdff1aSopenharmony_ci block_l = mul_vec >> 4; 176cabdff1aSopenharmony_ci mul_vec = block_r * qscale_r; 177cabdff1aSopenharmony_ci mul_vec *= quant_m_r; 178cabdff1aSopenharmony_ci block_r = mul_vec >> 4; 179cabdff1aSopenharmony_ci mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r); 180cabdff1aSopenharmony_ci block_neg = - mul; 181cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg, 182cabdff1aSopenharmony_ci (v16u8) mask); 183cabdff1aSopenharmony_ci sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org3, 184cabdff1aSopenharmony_ci (v16u8) zero_mask); 185cabdff1aSopenharmony_ci ST_SH(sum, block); 186cabdff1aSopenharmony_ci 187cabdff1aSopenharmony_ci block += 8; 188cabdff1aSopenharmony_ci quant_matrix += 8; 189cabdff1aSopenharmony_ci sad = __msa_hadd_s_w(sum, sum); 190cabdff1aSopenharmony_ci sum_res += HADD_SW_S32(sad); 191cabdff1aSopenharmony_ci } 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci return sum_res; 194cabdff1aSopenharmony_ci} 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_civoid ff_dct_unquantize_h263_intra_msa(MpegEncContext *s, 197cabdff1aSopenharmony_ci int16_t *block, int32_t index, 198cabdff1aSopenharmony_ci int32_t qscale) 199cabdff1aSopenharmony_ci{ 200cabdff1aSopenharmony_ci int32_t qmul, qadd; 201cabdff1aSopenharmony_ci int32_t nCoeffs; 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ci av_assert2(s->block_last_index[index] >= 0 || s->h263_aic); 204cabdff1aSopenharmony_ci 205cabdff1aSopenharmony_ci qmul = qscale << 1; 206cabdff1aSopenharmony_ci 207cabdff1aSopenharmony_ci if (!s->h263_aic) { 208cabdff1aSopenharmony_ci block[0] *= index < 4 ? s->y_dc_scale : s->c_dc_scale; 209cabdff1aSopenharmony_ci qadd = (qscale - 1) | 1; 210cabdff1aSopenharmony_ci } else { 211cabdff1aSopenharmony_ci qadd = 0; 212cabdff1aSopenharmony_ci } 213cabdff1aSopenharmony_ci if (s->ac_pred) 214cabdff1aSopenharmony_ci nCoeffs = 63; 215cabdff1aSopenharmony_ci else 216cabdff1aSopenharmony_ci nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]]; 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 1); 219cabdff1aSopenharmony_ci} 220cabdff1aSopenharmony_ci 221cabdff1aSopenharmony_civoid ff_dct_unquantize_h263_inter_msa(MpegEncContext *s, 222cabdff1aSopenharmony_ci int16_t *block, int32_t index, 223cabdff1aSopenharmony_ci int32_t qscale) 224cabdff1aSopenharmony_ci{ 225cabdff1aSopenharmony_ci int32_t qmul, qadd; 226cabdff1aSopenharmony_ci int32_t nCoeffs; 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci av_assert2(s->block_last_index[index] >= 0); 229cabdff1aSopenharmony_ci 230cabdff1aSopenharmony_ci qadd = (qscale - 1) | 1; 231cabdff1aSopenharmony_ci qmul = qscale << 1; 232cabdff1aSopenharmony_ci 233cabdff1aSopenharmony_ci nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]]; 234cabdff1aSopenharmony_ci 235cabdff1aSopenharmony_ci h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 0); 236cabdff1aSopenharmony_ci} 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_civoid ff_dct_unquantize_mpeg2_inter_msa(MpegEncContext *s, 239cabdff1aSopenharmony_ci int16_t *block, int32_t index, 240cabdff1aSopenharmony_ci int32_t qscale) 241cabdff1aSopenharmony_ci{ 242cabdff1aSopenharmony_ci const uint16_t *quant_matrix; 243cabdff1aSopenharmony_ci int32_t sum = -1; 244cabdff1aSopenharmony_ci 245cabdff1aSopenharmony_ci quant_matrix = s->inter_matrix; 246cabdff1aSopenharmony_ci 247cabdff1aSopenharmony_ci sum = mpeg2_dct_unquantize_inter_msa(block, qscale, quant_matrix); 248cabdff1aSopenharmony_ci 249cabdff1aSopenharmony_ci block[63] ^= sum & 1; 250cabdff1aSopenharmony_ci} 251