1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com) 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/mips/generic_macros_msa.h" 22cabdff1aSopenharmony_ci#include "me_cmp_mips.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_cistatic uint32_t sad_8width_msa(uint8_t *src, int32_t src_stride, 25cabdff1aSopenharmony_ci uint8_t *ref, int32_t ref_stride, 26cabdff1aSopenharmony_ci int32_t height) 27cabdff1aSopenharmony_ci{ 28cabdff1aSopenharmony_ci int32_t ht_cnt; 29cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3; 30cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci for (ht_cnt = (height >> 2); ht_cnt--;) { 33cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 34cabdff1aSopenharmony_ci src += (4 * src_stride); 35cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); 36cabdff1aSopenharmony_ci ref += (4 * ref_stride); 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, 39cabdff1aSopenharmony_ci src0, src1, ref0, ref1); 40cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, ref0, ref1); 41cabdff1aSopenharmony_ci } 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 44cabdff1aSopenharmony_ci} 45cabdff1aSopenharmony_ci 46cabdff1aSopenharmony_cistatic uint32_t sad_16width_msa(uint8_t *src, int32_t src_stride, 47cabdff1aSopenharmony_ci uint8_t *ref, int32_t ref_stride, 48cabdff1aSopenharmony_ci int32_t height) 49cabdff1aSopenharmony_ci{ 50cabdff1aSopenharmony_ci int32_t ht_cnt; 51cabdff1aSopenharmony_ci v16u8 src0, src1, ref0, ref1; 52cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ci for (ht_cnt = (height >> 2); ht_cnt--;) { 55cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src0, src1); 56cabdff1aSopenharmony_ci src += (2 * src_stride); 57cabdff1aSopenharmony_ci LD_UB2(ref, ref_stride, ref0, ref1); 58cabdff1aSopenharmony_ci ref += (2 * ref_stride); 59cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, ref0, ref1); 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src0, src1); 62cabdff1aSopenharmony_ci src += (2 * src_stride); 63cabdff1aSopenharmony_ci LD_UB2(ref, ref_stride, ref0, ref1); 64cabdff1aSopenharmony_ci ref += (2 * ref_stride); 65cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, ref0, ref1); 66cabdff1aSopenharmony_ci } 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 69cabdff1aSopenharmony_ci} 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_cistatic uint32_t sad_horiz_bilinear_filter_8width_msa(uint8_t *src, 72cabdff1aSopenharmony_ci int32_t src_stride, 73cabdff1aSopenharmony_ci uint8_t *ref, 74cabdff1aSopenharmony_ci int32_t ref_stride, 75cabdff1aSopenharmony_ci int32_t height) 76cabdff1aSopenharmony_ci{ 77cabdff1aSopenharmony_ci int32_t ht_cnt; 78cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, comp0, comp1; 79cabdff1aSopenharmony_ci v16u8 ref0, ref1, ref2, ref3, ref4, ref5; 80cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 81cabdff1aSopenharmony_ci 82cabdff1aSopenharmony_ci for (ht_cnt = (height >> 3); ht_cnt--;) { 83cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 84cabdff1aSopenharmony_ci src += (4 * src_stride); 85cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); 86cabdff1aSopenharmony_ci ref += (4 * ref_stride); 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci PCKEV_D2_UB(src1, src0, src3, src2, src0, src1); 89cabdff1aSopenharmony_ci PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref4, ref5); 90cabdff1aSopenharmony_ci SLDI_B4_UB(ref0, ref0, ref1, ref1, ref2, ref2, ref3, ref3, 1, 91cabdff1aSopenharmony_ci ref0, ref1, ref2, ref3); 92cabdff1aSopenharmony_ci PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); 93cabdff1aSopenharmony_ci AVER_UB2_UB(ref4, ref0, ref5, ref1, comp0, comp1); 94cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 95cabdff1aSopenharmony_ci 96cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 97cabdff1aSopenharmony_ci src += (4 * src_stride); 98cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); 99cabdff1aSopenharmony_ci ref += (4 * ref_stride); 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci PCKEV_D2_UB(src1, src0, src3, src2, src0, src1); 102cabdff1aSopenharmony_ci PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref4, ref5); 103cabdff1aSopenharmony_ci SLDI_B4_UB(ref0, ref0, ref1, ref1, ref2, ref2, ref3, ref3, 1, 104cabdff1aSopenharmony_ci ref0, ref1, ref2, ref3); 105cabdff1aSopenharmony_ci PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); 106cabdff1aSopenharmony_ci AVER_UB2_UB(ref4, ref0, ref5, ref1, comp0, comp1); 107cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 108cabdff1aSopenharmony_ci } 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 111cabdff1aSopenharmony_ci} 112cabdff1aSopenharmony_ci 113cabdff1aSopenharmony_cistatic uint32_t sad_horiz_bilinear_filter_16width_msa(uint8_t *src, 114cabdff1aSopenharmony_ci int32_t src_stride, 115cabdff1aSopenharmony_ci uint8_t *ref, 116cabdff1aSopenharmony_ci int32_t ref_stride, 117cabdff1aSopenharmony_ci int32_t height) 118cabdff1aSopenharmony_ci{ 119cabdff1aSopenharmony_ci int32_t ht_cnt; 120cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, comp0, comp1; 121cabdff1aSopenharmony_ci v16u8 ref00, ref10, ref20, ref30, ref01, ref11, ref21, ref31; 122cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ci for (ht_cnt = (height >> 3); ht_cnt--;) { 125cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 126cabdff1aSopenharmony_ci src += (4 * src_stride); 127cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30); 128cabdff1aSopenharmony_ci LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31); 129cabdff1aSopenharmony_ci ref += (4 * ref_stride); 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1); 132cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 133cabdff1aSopenharmony_ci AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1); 134cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src2, src3, comp0, comp1); 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 137cabdff1aSopenharmony_ci src += (4 * src_stride); 138cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30); 139cabdff1aSopenharmony_ci LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31); 140cabdff1aSopenharmony_ci ref += (4 * ref_stride); 141cabdff1aSopenharmony_ci 142cabdff1aSopenharmony_ci AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1); 143cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 144cabdff1aSopenharmony_ci AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1); 145cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src2, src3, comp0, comp1); 146cabdff1aSopenharmony_ci } 147cabdff1aSopenharmony_ci 148cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 149cabdff1aSopenharmony_ci} 150cabdff1aSopenharmony_ci 151cabdff1aSopenharmony_cistatic uint32_t sad_vert_bilinear_filter_8width_msa(uint8_t *src, 152cabdff1aSopenharmony_ci int32_t src_stride, 153cabdff1aSopenharmony_ci uint8_t *ref, 154cabdff1aSopenharmony_ci int32_t ref_stride, 155cabdff1aSopenharmony_ci int32_t height) 156cabdff1aSopenharmony_ci{ 157cabdff1aSopenharmony_ci int32_t ht_cnt; 158cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, comp0, comp1; 159cabdff1aSopenharmony_ci v16u8 ref0, ref1, ref2, ref3, ref4; 160cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci for (ht_cnt = (height >> 3); ht_cnt--;) { 163cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 164cabdff1aSopenharmony_ci src += (4 * src_stride); 165cabdff1aSopenharmony_ci LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4); 166cabdff1aSopenharmony_ci ref += (4 * ref_stride); 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci PCKEV_D2_UB(src1, src0, src3, src2, src0, src1); 169cabdff1aSopenharmony_ci PCKEV_D2_UB(ref1, ref0, ref2, ref1, ref0, ref1); 170cabdff1aSopenharmony_ci PCKEV_D2_UB(ref3, ref2, ref4, ref3, ref2, ref3); 171cabdff1aSopenharmony_ci AVER_UB2_UB(ref1, ref0, ref3, ref2, comp0, comp1); 172cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 175cabdff1aSopenharmony_ci src += (4 * src_stride); 176cabdff1aSopenharmony_ci LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4); 177cabdff1aSopenharmony_ci ref += (4 * ref_stride); 178cabdff1aSopenharmony_ci 179cabdff1aSopenharmony_ci PCKEV_D2_UB(src1, src0, src3, src2, src0, src1); 180cabdff1aSopenharmony_ci PCKEV_D2_UB(ref1, ref0, ref2, ref1, ref0, ref1); 181cabdff1aSopenharmony_ci PCKEV_D2_UB(ref3, ref2, ref4, ref3, ref2, ref3); 182cabdff1aSopenharmony_ci AVER_UB2_UB(ref1, ref0, ref3, ref2, comp0, comp1); 183cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 184cabdff1aSopenharmony_ci } 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 187cabdff1aSopenharmony_ci} 188cabdff1aSopenharmony_ci 189cabdff1aSopenharmony_cistatic uint32_t sad_vert_bilinear_filter_16width_msa(uint8_t *src, 190cabdff1aSopenharmony_ci int32_t src_stride, 191cabdff1aSopenharmony_ci uint8_t *ref, 192cabdff1aSopenharmony_ci int32_t ref_stride, 193cabdff1aSopenharmony_ci int32_t height) 194cabdff1aSopenharmony_ci{ 195cabdff1aSopenharmony_ci int32_t ht_cnt; 196cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, comp0, comp1; 197cabdff1aSopenharmony_ci v16u8 ref0, ref1, ref2, ref3, ref4; 198cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 199cabdff1aSopenharmony_ci 200cabdff1aSopenharmony_ci for (ht_cnt = (height >> 3); ht_cnt--;) { 201cabdff1aSopenharmony_ci LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3); 202cabdff1aSopenharmony_ci ref += (5 * ref_stride); 203cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 204cabdff1aSopenharmony_ci src += (4 * src_stride); 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci AVER_UB2_UB(ref0, ref4, ref1, ref0, comp0, comp1); 207cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 208cabdff1aSopenharmony_ci AVER_UB2_UB(ref2, ref1, ref3, ref2, comp0, comp1); 209cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src2, src3, comp0, comp1); 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci ref4 = ref3; 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); 214cabdff1aSopenharmony_ci ref += (3 * ref_stride); 215cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 216cabdff1aSopenharmony_ci src += (4 * src_stride); 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci AVER_UB2_UB(ref0, ref4, ref1, ref0, comp0, comp1); 219cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src0, src1, comp0, comp1); 220cabdff1aSopenharmony_ci AVER_UB2_UB(ref2, ref1, ref3, ref2, comp0, comp1); 221cabdff1aSopenharmony_ci sad += SAD_UB2_UH(src2, src3, comp0, comp1); 222cabdff1aSopenharmony_ci } 223cabdff1aSopenharmony_ci 224cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 225cabdff1aSopenharmony_ci} 226cabdff1aSopenharmony_ci 227cabdff1aSopenharmony_cistatic uint32_t sad_hv_bilinear_filter_8width_msa(uint8_t *src, 228cabdff1aSopenharmony_ci int32_t src_stride, 229cabdff1aSopenharmony_ci uint8_t *ref, 230cabdff1aSopenharmony_ci int32_t ref_stride, 231cabdff1aSopenharmony_ci int32_t height) 232cabdff1aSopenharmony_ci{ 233cabdff1aSopenharmony_ci int32_t ht_cnt; 234cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, temp0, temp1, diff; 235cabdff1aSopenharmony_ci v16u8 ref0, ref1, ref2, ref3, ref4; 236cabdff1aSopenharmony_ci v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; 237cabdff1aSopenharmony_ci v8u16 comp0, comp1, comp2, comp3; 238cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci for (ht_cnt = (height >> 2); ht_cnt--;) { 241cabdff1aSopenharmony_ci LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3); 242cabdff1aSopenharmony_ci ref += (4 * ref_stride); 243cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 244cabdff1aSopenharmony_ci src += (4 * src_stride); 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci PCKEV_D2_UB(src1, src0, src3, src2, src0, src1); 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_ci VSHF_B2_UB(ref4, ref4, ref0, ref0, mask, mask, temp0, temp1); 249cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 250cabdff1aSopenharmony_ci comp1 = __msa_hadd_u_h(temp1, temp1); 251cabdff1aSopenharmony_ci comp0 += comp1; 252cabdff1aSopenharmony_ci comp0 = (v8u16) __msa_srari_h((v8i16) comp0, 2); 253cabdff1aSopenharmony_ci comp0 = (v8u16) __msa_pckev_b((v16i8) comp0, (v16i8) comp0); 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_ci temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref1, (v16i8) ref1); 256cabdff1aSopenharmony_ci comp2 = __msa_hadd_u_h(temp0, temp0); 257cabdff1aSopenharmony_ci comp1 += comp2; 258cabdff1aSopenharmony_ci comp1 = (v8u16) __msa_srari_h((v8i16) comp1, 2); 259cabdff1aSopenharmony_ci comp1 = (v8u16) __msa_pckev_b((v16i8) comp1, (v16i8) comp1); 260cabdff1aSopenharmony_ci comp1 = (v8u16) __msa_pckev_d((v2i64) comp1, (v2i64) comp0); 261cabdff1aSopenharmony_ci diff = (v16u8) __msa_asub_u_b(src0, (v16u8) comp1); 262cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_ci temp1 = (v16u8) __msa_vshf_b(mask, (v16i8) ref2, (v16i8) ref2); 265cabdff1aSopenharmony_ci comp3 = __msa_hadd_u_h(temp1, temp1); 266cabdff1aSopenharmony_ci comp2 += comp3; 267cabdff1aSopenharmony_ci comp2 = (v8u16) __msa_srari_h((v8i16) comp2, 2); 268cabdff1aSopenharmony_ci comp2 = (v8u16) __msa_pckev_b((v16i8) comp2, (v16i8) comp2); 269cabdff1aSopenharmony_ci 270cabdff1aSopenharmony_ci temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref3, (v16i8) ref3); 271cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 272cabdff1aSopenharmony_ci comp3 += comp0; 273cabdff1aSopenharmony_ci comp3 = (v8u16) __msa_srari_h((v8i16) comp3, 2); 274cabdff1aSopenharmony_ci comp3 = (v8u16) __msa_pckev_b((v16i8) comp3, (v16i8) comp3); 275cabdff1aSopenharmony_ci comp3 = (v8u16) __msa_pckev_d((v2i64) comp3, (v2i64) comp2); 276cabdff1aSopenharmony_ci diff = (v16u8) __msa_asub_u_b(src1, (v16u8) comp3); 277cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 278cabdff1aSopenharmony_ci } 279cabdff1aSopenharmony_ci 280cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 281cabdff1aSopenharmony_ci} 282cabdff1aSopenharmony_ci 283cabdff1aSopenharmony_cistatic uint32_t sad_hv_bilinear_filter_16width_msa(uint8_t *src, 284cabdff1aSopenharmony_ci int32_t src_stride, 285cabdff1aSopenharmony_ci uint8_t *ref, 286cabdff1aSopenharmony_ci int32_t ref_stride, 287cabdff1aSopenharmony_ci int32_t height) 288cabdff1aSopenharmony_ci{ 289cabdff1aSopenharmony_ci int32_t ht_cnt; 290cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, comp, diff; 291cabdff1aSopenharmony_ci v16u8 temp0, temp1, temp2, temp3; 292cabdff1aSopenharmony_ci v16u8 ref00, ref01, ref02, ref03, ref04, ref10, ref11, ref12, ref13, ref14; 293cabdff1aSopenharmony_ci v8u16 comp0, comp1, comp2, comp3; 294cabdff1aSopenharmony_ci v8u16 sad = { 0 }; 295cabdff1aSopenharmony_ci 296cabdff1aSopenharmony_ci for (ht_cnt = (height >> 3); ht_cnt--;) { 297cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 298cabdff1aSopenharmony_ci src += (4 * src_stride); 299cabdff1aSopenharmony_ci LD_UB5(ref, ref_stride, ref04, ref00, ref01, ref02, ref03); 300cabdff1aSopenharmony_ci LD_UB5(ref + 1, ref_stride, ref14, ref10, ref11, ref12, ref13); 301cabdff1aSopenharmony_ci ref += (5 * ref_stride); 302cabdff1aSopenharmony_ci 303cabdff1aSopenharmony_ci ILVRL_B2_UB(ref14, ref04, temp0, temp1); 304cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 305cabdff1aSopenharmony_ci comp1 = __msa_hadd_u_h(temp1, temp1); 306cabdff1aSopenharmony_ci ILVRL_B2_UB(ref10, ref00, temp2, temp3); 307cabdff1aSopenharmony_ci comp2 = __msa_hadd_u_h(temp2, temp2); 308cabdff1aSopenharmony_ci comp3 = __msa_hadd_u_h(temp3, temp3); 309cabdff1aSopenharmony_ci comp0 += comp2; 310cabdff1aSopenharmony_ci comp1 += comp3; 311cabdff1aSopenharmony_ci SRARI_H2_UH(comp0, comp1, 2); 312cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0); 313cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src0, comp); 314cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci ILVRL_B2_UB(ref11, ref01, temp0, temp1); 317cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 318cabdff1aSopenharmony_ci comp1 = __msa_hadd_u_h(temp1, temp1); 319cabdff1aSopenharmony_ci comp2 += comp0; 320cabdff1aSopenharmony_ci comp3 += comp1; 321cabdff1aSopenharmony_ci SRARI_H2_UH(comp2, comp3, 2); 322cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2); 323cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src1, comp); 324cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 325cabdff1aSopenharmony_ci 326cabdff1aSopenharmony_ci ILVRL_B2_UB(ref12, ref02, temp2, temp3); 327cabdff1aSopenharmony_ci comp2 = __msa_hadd_u_h(temp2, temp2); 328cabdff1aSopenharmony_ci comp3 = __msa_hadd_u_h(temp3, temp3); 329cabdff1aSopenharmony_ci comp0 += comp2; 330cabdff1aSopenharmony_ci comp1 += comp3; 331cabdff1aSopenharmony_ci SRARI_H2_UH(comp0, comp1, 2); 332cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0); 333cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src2, comp); 334cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 335cabdff1aSopenharmony_ci 336cabdff1aSopenharmony_ci ILVRL_B2_UB(ref13, ref03, temp0, temp1); 337cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 338cabdff1aSopenharmony_ci comp1 = __msa_hadd_u_h(temp1, temp1); 339cabdff1aSopenharmony_ci comp2 += comp0; 340cabdff1aSopenharmony_ci comp3 += comp1; 341cabdff1aSopenharmony_ci SRARI_H2_UH(comp2, comp3, 2); 342cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2); 343cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src3, comp); 344cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 345cabdff1aSopenharmony_ci 346cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 347cabdff1aSopenharmony_ci src += (4 * src_stride); 348cabdff1aSopenharmony_ci LD_UB4(ref, ref_stride, ref00, ref01, ref02, ref03); 349cabdff1aSopenharmony_ci LD_UB4(ref + 1, ref_stride, ref10, ref11, ref12, ref13); 350cabdff1aSopenharmony_ci ref += (3 * ref_stride); 351cabdff1aSopenharmony_ci 352cabdff1aSopenharmony_ci ILVRL_B2_UB(ref10, ref00, temp2, temp3); 353cabdff1aSopenharmony_ci comp2 = __msa_hadd_u_h(temp2, temp2); 354cabdff1aSopenharmony_ci comp3 = __msa_hadd_u_h(temp3, temp3); 355cabdff1aSopenharmony_ci comp0 += comp2; 356cabdff1aSopenharmony_ci comp1 += comp3; 357cabdff1aSopenharmony_ci SRARI_H2_UH(comp0, comp1, 2); 358cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0); 359cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src0, comp); 360cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 361cabdff1aSopenharmony_ci 362cabdff1aSopenharmony_ci ILVRL_B2_UB(ref11, ref01, temp0, temp1); 363cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 364cabdff1aSopenharmony_ci comp1 = __msa_hadd_u_h(temp1, temp1); 365cabdff1aSopenharmony_ci comp2 += comp0; 366cabdff1aSopenharmony_ci comp3 += comp1; 367cabdff1aSopenharmony_ci SRARI_H2_UH(comp2, comp3, 2); 368cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2); 369cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src1, comp); 370cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 371cabdff1aSopenharmony_ci 372cabdff1aSopenharmony_ci ILVRL_B2_UB(ref12, ref02, temp2, temp3); 373cabdff1aSopenharmony_ci comp2 = __msa_hadd_u_h(temp2, temp2); 374cabdff1aSopenharmony_ci comp3 = __msa_hadd_u_h(temp3, temp3); 375cabdff1aSopenharmony_ci comp0 += comp2; 376cabdff1aSopenharmony_ci comp1 += comp3; 377cabdff1aSopenharmony_ci SRARI_H2_UH(comp0, comp1, 2); 378cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0); 379cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src2, comp); 380cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 381cabdff1aSopenharmony_ci 382cabdff1aSopenharmony_ci ILVRL_B2_UB(ref13, ref03, temp0, temp1); 383cabdff1aSopenharmony_ci comp0 = __msa_hadd_u_h(temp0, temp0); 384cabdff1aSopenharmony_ci comp1 = __msa_hadd_u_h(temp1, temp1); 385cabdff1aSopenharmony_ci comp2 += comp0; 386cabdff1aSopenharmony_ci comp3 += comp1; 387cabdff1aSopenharmony_ci SRARI_H2_UH(comp2, comp3, 2); 388cabdff1aSopenharmony_ci comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2); 389cabdff1aSopenharmony_ci diff = __msa_asub_u_b(src3, comp); 390cabdff1aSopenharmony_ci sad += __msa_hadd_u_h(diff, diff); 391cabdff1aSopenharmony_ci } 392cabdff1aSopenharmony_ci 393cabdff1aSopenharmony_ci return (HADD_UH_U32(sad)); 394cabdff1aSopenharmony_ci} 395cabdff1aSopenharmony_ci 396cabdff1aSopenharmony_ci#define CALC_MSE_B(src, ref, var) \ 397cabdff1aSopenharmony_ci{ \ 398cabdff1aSopenharmony_ci v16u8 src_l0_m, src_l1_m; \ 399cabdff1aSopenharmony_ci v8i16 res_l0_m, res_l1_m; \ 400cabdff1aSopenharmony_ci \ 401cabdff1aSopenharmony_ci ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \ 402cabdff1aSopenharmony_ci HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ 403cabdff1aSopenharmony_ci DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ 404cabdff1aSopenharmony_ci} 405cabdff1aSopenharmony_ci 406cabdff1aSopenharmony_cistatic uint32_t sse_4width_msa(uint8_t *src_ptr, int32_t src_stride, 407cabdff1aSopenharmony_ci uint8_t *ref_ptr, int32_t ref_stride, 408cabdff1aSopenharmony_ci int32_t height) 409cabdff1aSopenharmony_ci{ 410cabdff1aSopenharmony_ci int32_t ht_cnt; 411cabdff1aSopenharmony_ci uint32_t sse; 412cabdff1aSopenharmony_ci uint32_t src0, src1, src2, src3; 413cabdff1aSopenharmony_ci uint32_t ref0, ref1, ref2, ref3; 414cabdff1aSopenharmony_ci v16u8 src = { 0 }; 415cabdff1aSopenharmony_ci v16u8 ref = { 0 }; 416cabdff1aSopenharmony_ci v4i32 var = { 0 }; 417cabdff1aSopenharmony_ci 418cabdff1aSopenharmony_ci for (ht_cnt = (height >> 2); ht_cnt--;) { 419cabdff1aSopenharmony_ci LW4(src_ptr, src_stride, src0, src1, src2, src3); 420cabdff1aSopenharmony_ci src_ptr += (4 * src_stride); 421cabdff1aSopenharmony_ci LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 422cabdff1aSopenharmony_ci ref_ptr += (4 * ref_stride); 423cabdff1aSopenharmony_ci 424cabdff1aSopenharmony_ci INSERT_W4_UB(src0, src1, src2, src3, src); 425cabdff1aSopenharmony_ci INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); 426cabdff1aSopenharmony_ci CALC_MSE_B(src, ref, var); 427cabdff1aSopenharmony_ci } 428cabdff1aSopenharmony_ci 429cabdff1aSopenharmony_ci sse = HADD_SW_S32(var); 430cabdff1aSopenharmony_ci 431cabdff1aSopenharmony_ci return sse; 432cabdff1aSopenharmony_ci} 433cabdff1aSopenharmony_ci 434cabdff1aSopenharmony_cistatic uint32_t sse_8width_msa(uint8_t *src_ptr, int32_t src_stride, 435cabdff1aSopenharmony_ci uint8_t *ref_ptr, int32_t ref_stride, 436cabdff1aSopenharmony_ci int32_t height) 437cabdff1aSopenharmony_ci{ 438cabdff1aSopenharmony_ci int32_t ht_cnt; 439cabdff1aSopenharmony_ci uint32_t sse; 440cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3; 441cabdff1aSopenharmony_ci v16u8 ref0, ref1, ref2, ref3; 442cabdff1aSopenharmony_ci v4i32 var = { 0 }; 443cabdff1aSopenharmony_ci 444cabdff1aSopenharmony_ci for (ht_cnt = (height >> 2); ht_cnt--;) { 445cabdff1aSopenharmony_ci LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); 446cabdff1aSopenharmony_ci src_ptr += (4 * src_stride); 447cabdff1aSopenharmony_ci LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); 448cabdff1aSopenharmony_ci ref_ptr += (4 * ref_stride); 449cabdff1aSopenharmony_ci 450cabdff1aSopenharmony_ci PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, 451cabdff1aSopenharmony_ci src0, src1, ref0, ref1); 452cabdff1aSopenharmony_ci CALC_MSE_B(src0, ref0, var); 453cabdff1aSopenharmony_ci CALC_MSE_B(src1, ref1, var); 454cabdff1aSopenharmony_ci } 455cabdff1aSopenharmony_ci 456cabdff1aSopenharmony_ci sse = HADD_SW_S32(var); 457cabdff1aSopenharmony_ci 458cabdff1aSopenharmony_ci return sse; 459cabdff1aSopenharmony_ci} 460cabdff1aSopenharmony_ci 461cabdff1aSopenharmony_cistatic uint32_t sse_16width_msa(uint8_t *src_ptr, int32_t src_stride, 462cabdff1aSopenharmony_ci uint8_t *ref_ptr, int32_t ref_stride, 463cabdff1aSopenharmony_ci int32_t height) 464cabdff1aSopenharmony_ci{ 465cabdff1aSopenharmony_ci int32_t ht_cnt; 466cabdff1aSopenharmony_ci uint32_t sse; 467cabdff1aSopenharmony_ci v16u8 src, ref; 468cabdff1aSopenharmony_ci v4i32 var = { 0 }; 469cabdff1aSopenharmony_ci 470cabdff1aSopenharmony_ci for (ht_cnt = (height >> 2); ht_cnt--;) { 471cabdff1aSopenharmony_ci src = LD_UB(src_ptr); 472cabdff1aSopenharmony_ci src_ptr += src_stride; 473cabdff1aSopenharmony_ci ref = LD_UB(ref_ptr); 474cabdff1aSopenharmony_ci ref_ptr += ref_stride; 475cabdff1aSopenharmony_ci CALC_MSE_B(src, ref, var); 476cabdff1aSopenharmony_ci 477cabdff1aSopenharmony_ci src = LD_UB(src_ptr); 478cabdff1aSopenharmony_ci src_ptr += src_stride; 479cabdff1aSopenharmony_ci ref = LD_UB(ref_ptr); 480cabdff1aSopenharmony_ci ref_ptr += ref_stride; 481cabdff1aSopenharmony_ci CALC_MSE_B(src, ref, var); 482cabdff1aSopenharmony_ci 483cabdff1aSopenharmony_ci src = LD_UB(src_ptr); 484cabdff1aSopenharmony_ci src_ptr += src_stride; 485cabdff1aSopenharmony_ci ref = LD_UB(ref_ptr); 486cabdff1aSopenharmony_ci ref_ptr += ref_stride; 487cabdff1aSopenharmony_ci CALC_MSE_B(src, ref, var); 488cabdff1aSopenharmony_ci 489cabdff1aSopenharmony_ci src = LD_UB(src_ptr); 490cabdff1aSopenharmony_ci src_ptr += src_stride; 491cabdff1aSopenharmony_ci ref = LD_UB(ref_ptr); 492cabdff1aSopenharmony_ci ref_ptr += ref_stride; 493cabdff1aSopenharmony_ci CALC_MSE_B(src, ref, var); 494cabdff1aSopenharmony_ci } 495cabdff1aSopenharmony_ci 496cabdff1aSopenharmony_ci sse = HADD_SW_S32(var); 497cabdff1aSopenharmony_ci 498cabdff1aSopenharmony_ci return sse; 499cabdff1aSopenharmony_ci} 500cabdff1aSopenharmony_ci 501cabdff1aSopenharmony_cistatic int32_t hadamard_diff_8x8_msa(uint8_t *src, int32_t src_stride, 502cabdff1aSopenharmony_ci uint8_t *ref, int32_t ref_stride) 503cabdff1aSopenharmony_ci{ 504cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 505cabdff1aSopenharmony_ci v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7; 506cabdff1aSopenharmony_ci v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7; 507cabdff1aSopenharmony_ci v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 508cabdff1aSopenharmony_ci v8i16 sum = { 0 }; 509cabdff1aSopenharmony_ci v8i16 zero = { 0 }; 510cabdff1aSopenharmony_ci 511cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 512cabdff1aSopenharmony_ci LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); 513cabdff1aSopenharmony_ci ILVR_B8_UH(src0, ref0, src1, ref1, src2, ref2, src3, ref3, 514cabdff1aSopenharmony_ci src4, ref4, src5, ref5, src6, ref6, src7, ref7, 515cabdff1aSopenharmony_ci diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7); 516cabdff1aSopenharmony_ci HSUB_UB4_UH(diff0, diff1, diff2, diff3, diff0, diff1, diff2, diff3); 517cabdff1aSopenharmony_ci HSUB_UB4_UH(diff4, diff5, diff6, diff7, diff4, diff5, diff6, diff7); 518cabdff1aSopenharmony_ci TRANSPOSE8x8_UH_UH(diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7, 519cabdff1aSopenharmony_ci diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7); 520cabdff1aSopenharmony_ci BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1, 521cabdff1aSopenharmony_ci temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1); 522cabdff1aSopenharmony_ci BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2, 523cabdff1aSopenharmony_ci diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2); 524cabdff1aSopenharmony_ci BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4, 525cabdff1aSopenharmony_ci temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4); 526cabdff1aSopenharmony_ci TRANSPOSE8x8_UH_UH(temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, 527cabdff1aSopenharmony_ci temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7); 528cabdff1aSopenharmony_ci BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1, 529cabdff1aSopenharmony_ci diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1); 530cabdff1aSopenharmony_ci BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2, 531cabdff1aSopenharmony_ci temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2); 532cabdff1aSopenharmony_ci ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7, 533cabdff1aSopenharmony_ci diff0, diff1, diff2, diff3); 534cabdff1aSopenharmony_ci sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7); 535cabdff1aSopenharmony_ci sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6); 536cabdff1aSopenharmony_ci sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5); 537cabdff1aSopenharmony_ci sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4); 538cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff0, zero); 539cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff1, zero); 540cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff2, zero); 541cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff3, zero); 542cabdff1aSopenharmony_ci 543cabdff1aSopenharmony_ci return (HADD_UH_U32(sum)); 544cabdff1aSopenharmony_ci} 545cabdff1aSopenharmony_ci 546cabdff1aSopenharmony_cistatic int32_t hadamard_intra_8x8_msa(uint8_t *src, int32_t src_stride, 547cabdff1aSopenharmony_ci uint8_t *ref, int32_t ref_stride) 548cabdff1aSopenharmony_ci{ 549cabdff1aSopenharmony_ci int32_t sum_res = 0; 550cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 551cabdff1aSopenharmony_ci v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7; 552cabdff1aSopenharmony_ci v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 553cabdff1aSopenharmony_ci v8i16 sum = { 0 }; 554cabdff1aSopenharmony_ci v16i8 zero = { 0 }; 555cabdff1aSopenharmony_ci 556cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 557cabdff1aSopenharmony_ci TRANSPOSE8x8_UB_UB(src0, src1, src2, src3, src4, src5, src6, src7, 558cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 559cabdff1aSopenharmony_ci ILVR_B8_UH(zero, src0, zero, src1, zero, src2, zero, src3, 560cabdff1aSopenharmony_ci zero, src4, zero, src5, zero, src6, zero, src7, 561cabdff1aSopenharmony_ci diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7); 562cabdff1aSopenharmony_ci BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1, 563cabdff1aSopenharmony_ci temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1); 564cabdff1aSopenharmony_ci BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2, 565cabdff1aSopenharmony_ci diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2); 566cabdff1aSopenharmony_ci BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4, 567cabdff1aSopenharmony_ci temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4); 568cabdff1aSopenharmony_ci TRANSPOSE8x8_UH_UH(temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, 569cabdff1aSopenharmony_ci temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7); 570cabdff1aSopenharmony_ci BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1, 571cabdff1aSopenharmony_ci diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1); 572cabdff1aSopenharmony_ci BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2, 573cabdff1aSopenharmony_ci temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2); 574cabdff1aSopenharmony_ci ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7, 575cabdff1aSopenharmony_ci diff0, diff1, diff2, diff3); 576cabdff1aSopenharmony_ci sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7); 577cabdff1aSopenharmony_ci sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6); 578cabdff1aSopenharmony_ci sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5); 579cabdff1aSopenharmony_ci sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4); 580cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff0, (v8i16) zero); 581cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff1, (v8i16) zero); 582cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff2, (v8i16) zero); 583cabdff1aSopenharmony_ci sum += __msa_add_a_h((v8i16) diff3, (v8i16) zero); 584cabdff1aSopenharmony_ci sum_res = (HADD_UH_U32(sum)); 585cabdff1aSopenharmony_ci sum_res -= abs(temp0[0] + temp4[0]); 586cabdff1aSopenharmony_ci 587cabdff1aSopenharmony_ci return sum_res; 588cabdff1aSopenharmony_ci} 589cabdff1aSopenharmony_ci 590cabdff1aSopenharmony_ciint ff_pix_abs16_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, 591cabdff1aSopenharmony_ci ptrdiff_t stride, int height) 592cabdff1aSopenharmony_ci{ 593cabdff1aSopenharmony_ci return sad_16width_msa(src, stride, ref, stride, height); 594cabdff1aSopenharmony_ci} 595cabdff1aSopenharmony_ci 596cabdff1aSopenharmony_ciint ff_pix_abs8_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, 597cabdff1aSopenharmony_ci ptrdiff_t stride, int height) 598cabdff1aSopenharmony_ci{ 599cabdff1aSopenharmony_ci return sad_8width_msa(src, stride, ref, stride, height); 600cabdff1aSopenharmony_ci} 601cabdff1aSopenharmony_ci 602cabdff1aSopenharmony_ciint ff_pix_abs16_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 603cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 604cabdff1aSopenharmony_ci{ 605cabdff1aSopenharmony_ci return sad_horiz_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h); 606cabdff1aSopenharmony_ci} 607cabdff1aSopenharmony_ci 608cabdff1aSopenharmony_ciint ff_pix_abs16_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 609cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 610cabdff1aSopenharmony_ci{ 611cabdff1aSopenharmony_ci return sad_vert_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h); 612cabdff1aSopenharmony_ci} 613cabdff1aSopenharmony_ci 614cabdff1aSopenharmony_ciint ff_pix_abs16_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 615cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 616cabdff1aSopenharmony_ci{ 617cabdff1aSopenharmony_ci return sad_hv_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h); 618cabdff1aSopenharmony_ci} 619cabdff1aSopenharmony_ci 620cabdff1aSopenharmony_ciint ff_pix_abs8_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 621cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 622cabdff1aSopenharmony_ci{ 623cabdff1aSopenharmony_ci return sad_horiz_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h); 624cabdff1aSopenharmony_ci} 625cabdff1aSopenharmony_ci 626cabdff1aSopenharmony_ciint ff_pix_abs8_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 627cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 628cabdff1aSopenharmony_ci{ 629cabdff1aSopenharmony_ci return sad_vert_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h); 630cabdff1aSopenharmony_ci} 631cabdff1aSopenharmony_ci 632cabdff1aSopenharmony_ciint ff_pix_abs8_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, 633cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 634cabdff1aSopenharmony_ci{ 635cabdff1aSopenharmony_ci return sad_hv_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h); 636cabdff1aSopenharmony_ci} 637cabdff1aSopenharmony_ci 638cabdff1aSopenharmony_ciint ff_sse16_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, 639cabdff1aSopenharmony_ci ptrdiff_t stride, int height) 640cabdff1aSopenharmony_ci{ 641cabdff1aSopenharmony_ci return sse_16width_msa(src, stride, ref, stride, height); 642cabdff1aSopenharmony_ci} 643cabdff1aSopenharmony_ci 644cabdff1aSopenharmony_ciint ff_sse8_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, 645cabdff1aSopenharmony_ci ptrdiff_t stride, int height) 646cabdff1aSopenharmony_ci{ 647cabdff1aSopenharmony_ci return sse_8width_msa(src, stride, ref, stride, height); 648cabdff1aSopenharmony_ci} 649cabdff1aSopenharmony_ci 650cabdff1aSopenharmony_ciint ff_sse4_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, 651cabdff1aSopenharmony_ci ptrdiff_t stride, int height) 652cabdff1aSopenharmony_ci{ 653cabdff1aSopenharmony_ci return sse_4width_msa(src, stride, ref, stride, height); 654cabdff1aSopenharmony_ci} 655cabdff1aSopenharmony_ci 656cabdff1aSopenharmony_ciint ff_hadamard8_diff8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src, 657cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 658cabdff1aSopenharmony_ci{ 659cabdff1aSopenharmony_ci return hadamard_diff_8x8_msa(src, stride, dst, stride); 660cabdff1aSopenharmony_ci} 661cabdff1aSopenharmony_ci 662cabdff1aSopenharmony_ciint ff_hadamard8_intra8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src, 663cabdff1aSopenharmony_ci ptrdiff_t stride, int h) 664cabdff1aSopenharmony_ci{ 665cabdff1aSopenharmony_ci return hadamard_intra_8x8_msa(src, stride, dst, stride); 666cabdff1aSopenharmony_ci} 667cabdff1aSopenharmony_ci 668cabdff1aSopenharmony_ci/* Hadamard Transform functions */ 669cabdff1aSopenharmony_ci#define WRAPPER8_16_SQ(name8, name16) \ 670cabdff1aSopenharmony_ciint name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ 671cabdff1aSopenharmony_ci ptrdiff_t stride, int h) \ 672cabdff1aSopenharmony_ci{ \ 673cabdff1aSopenharmony_ci int score = 0; \ 674cabdff1aSopenharmony_ci score += name8(s, dst, src, stride, 8); \ 675cabdff1aSopenharmony_ci score += name8(s, dst + 8, src + 8, stride, 8); \ 676cabdff1aSopenharmony_ci if(h == 16) { \ 677cabdff1aSopenharmony_ci dst += 8 * stride; \ 678cabdff1aSopenharmony_ci src += 8 * stride; \ 679cabdff1aSopenharmony_ci score +=name8(s, dst, src, stride, 8); \ 680cabdff1aSopenharmony_ci score +=name8(s, dst + 8, src + 8, stride, 8); \ 681cabdff1aSopenharmony_ci } \ 682cabdff1aSopenharmony_ci return score; \ 683cabdff1aSopenharmony_ci} 684cabdff1aSopenharmony_ci 685cabdff1aSopenharmony_ciWRAPPER8_16_SQ(ff_hadamard8_diff8x8_msa, ff_hadamard8_diff16_msa); 686cabdff1aSopenharmony_ciWRAPPER8_16_SQ(ff_hadamard8_intra8x8_msa, ff_hadamard8_intra16_msa); 687