1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com) 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/mips/generic_macros_msa.h" 22cabdff1aSopenharmony_ci#include "libavcodec/mips/hpeldsp_mips.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst) \ 25cabdff1aSopenharmony_ci{ \ 26cabdff1aSopenharmony_ci v16u8 tmp_m; \ 27cabdff1aSopenharmony_ci \ 28cabdff1aSopenharmony_ci tmp_m = (v16u8) __msa_pckev_b((v16i8) in0, (v16i8) in1); \ 29cabdff1aSopenharmony_ci tmp_m = __msa_aver_u_b(tmp_m, (v16u8) dst); \ 30cabdff1aSopenharmony_ci ST_UB(tmp_m, (pdst)); \ 31cabdff1aSopenharmony_ci} 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci#define PCKEV_ST_SB4(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ 34cabdff1aSopenharmony_ci{ \ 35cabdff1aSopenharmony_ci v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ 36cabdff1aSopenharmony_ci uint8_t *pdst_m = (uint8_t *) (pdst); \ 37cabdff1aSopenharmony_ci \ 38cabdff1aSopenharmony_ci PCKEV_B4_SB(in0, in1, in2, in3, in4, in5, in6, in7, \ 39cabdff1aSopenharmony_ci tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ 40cabdff1aSopenharmony_ci ST_SB4(tmp0_m, tmp1_m, tmp2_m, tmp3_m, pdst_m, stride); \ 41cabdff1aSopenharmony_ci} 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_ci#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3, \ 44cabdff1aSopenharmony_ci pdst, stride) \ 45cabdff1aSopenharmony_ci{ \ 46cabdff1aSopenharmony_ci v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ 47cabdff1aSopenharmony_ci uint8_t *pdst_m = (uint8_t *) (pdst); \ 48cabdff1aSopenharmony_ci \ 49cabdff1aSopenharmony_ci PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m); \ 50cabdff1aSopenharmony_ci PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \ 51cabdff1aSopenharmony_ci AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \ 52cabdff1aSopenharmony_ci ST_D4(tmp0_m, tmp1_m, 0, 1, 0, 1, pdst_m, stride); \ 53cabdff1aSopenharmony_ci} 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_cistatic void common_hz_bil_4w_msa(const uint8_t *src, int32_t src_stride, 56cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 57cabdff1aSopenharmony_ci uint8_t height) 58cabdff1aSopenharmony_ci{ 59cabdff1aSopenharmony_ci uint8_t loop_cnt; 60cabdff1aSopenharmony_ci uint32_t out0, out1; 61cabdff1aSopenharmony_ci v16u8 src0, src1, src0_sld1, src1_sld1, res0, res1; 62cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_ci for (loop_cnt = (height >> 1); loop_cnt--;) { 65cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src0, src1); 66cabdff1aSopenharmony_ci src += (2 * src_stride); 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci SLDI_B2_UB(zeros, src0, zeros, src1, 1, src0_sld1, src1_sld1); 69cabdff1aSopenharmony_ci AVER_UB2_UB(src0_sld1, src0, src1_sld1, src1, res0, res1); 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) res0, 0); 72cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) res1, 0); 73cabdff1aSopenharmony_ci SW(out0, dst); 74cabdff1aSopenharmony_ci dst += dst_stride; 75cabdff1aSopenharmony_ci SW(out1, dst); 76cabdff1aSopenharmony_ci dst += dst_stride; 77cabdff1aSopenharmony_ci } 78cabdff1aSopenharmony_ci} 79cabdff1aSopenharmony_ci 80cabdff1aSopenharmony_cistatic void common_hz_bil_8w_msa(const uint8_t *src, int32_t src_stride, 81cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 82cabdff1aSopenharmony_ci uint8_t height) 83cabdff1aSopenharmony_ci{ 84cabdff1aSopenharmony_ci uint8_t loop_cnt; 85cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1; 86cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci for (loop_cnt = (height >> 2); loop_cnt--;) { 89cabdff1aSopenharmony_ci LD_SB4(src, src_stride, src0, src1, src2, src3); 90cabdff1aSopenharmony_ci src += (4 * src_stride); 91cabdff1aSopenharmony_ci 92cabdff1aSopenharmony_ci SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, 93cabdff1aSopenharmony_ci src0_sld1, src1_sld1, src2_sld1, src3_sld1); 94cabdff1aSopenharmony_ci AVER_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, 95cabdff1aSopenharmony_ci src2, src2_sld1, src3, src3_sld1, dst, dst_stride); 96cabdff1aSopenharmony_ci dst += (4 * dst_stride); 97cabdff1aSopenharmony_ci } 98cabdff1aSopenharmony_ci} 99cabdff1aSopenharmony_ci 100cabdff1aSopenharmony_cistatic void common_hz_bil_16w_msa(const uint8_t *src, int32_t src_stride, 101cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 102cabdff1aSopenharmony_ci uint8_t height) 103cabdff1aSopenharmony_ci{ 104cabdff1aSopenharmony_ci uint8_t loop_cnt; 105cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 106cabdff1aSopenharmony_ci v16u8 src8, src9, src10, src11, src12, src13, src14, src15; 107cabdff1aSopenharmony_ci 108cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 109cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 110cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 111cabdff1aSopenharmony_ci src8, src9, src10, src11, src12, src13, src14, src15); 112cabdff1aSopenharmony_ci src += (8 * src_stride); 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_ci AVER_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11, 115cabdff1aSopenharmony_ci dst, dst_stride); 116cabdff1aSopenharmony_ci dst += (4 * dst_stride); 117cabdff1aSopenharmony_ci 118cabdff1aSopenharmony_ci AVER_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15, 119cabdff1aSopenharmony_ci dst, dst_stride); 120cabdff1aSopenharmony_ci dst += (4 * dst_stride); 121cabdff1aSopenharmony_ci } 122cabdff1aSopenharmony_ci} 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride, 125cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 126cabdff1aSopenharmony_ci{ 127cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src4, src5, src6, src7; 128cabdff1aSopenharmony_ci v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1; 129cabdff1aSopenharmony_ci v16i8 src4_sld1, src5_sld1, src6_sld1, src7_sld1; 130cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_ci LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 133cabdff1aSopenharmony_ci src += (8 * src_stride); 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, 136cabdff1aSopenharmony_ci src0_sld1, src1_sld1, src2_sld1, src3_sld1); 137cabdff1aSopenharmony_ci SLDI_B4_SB(zeros, src4, zeros, src5, zeros, src6, zeros, src7, 1, 138cabdff1aSopenharmony_ci src4_sld1, src5_sld1, src6_sld1, src7_sld1); 139cabdff1aSopenharmony_ci 140cabdff1aSopenharmony_ci AVE_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, 141cabdff1aSopenharmony_ci src2, src2_sld1, src3, src3_sld1, dst, dst_stride); 142cabdff1aSopenharmony_ci dst += (4 * dst_stride); 143cabdff1aSopenharmony_ci AVE_ST8x4_UB(src4, src4_sld1, src5, src5_sld1, 144cabdff1aSopenharmony_ci src6, src6_sld1, src7, src7_sld1, dst, dst_stride); 145cabdff1aSopenharmony_ci} 146cabdff1aSopenharmony_ci 147cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride, 148cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 149cabdff1aSopenharmony_ci{ 150cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1; 151cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_ci LD_SB4(src, src_stride, src0, src1, src2, src3); 154cabdff1aSopenharmony_ci SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, 155cabdff1aSopenharmony_ci src0_sld1, src1_sld1, src2_sld1, src3_sld1); 156cabdff1aSopenharmony_ci AVE_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, 157cabdff1aSopenharmony_ci src2, src2_sld1, src3, src3_sld1, dst, dst_stride); 158cabdff1aSopenharmony_ci} 159cabdff1aSopenharmony_ci 160cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_16x16_msa(const uint8_t *src, 161cabdff1aSopenharmony_ci int32_t src_stride, 162cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 163cabdff1aSopenharmony_ci{ 164cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 165cabdff1aSopenharmony_ci v16u8 src9, src10, src11, src12, src13, src14, src15; 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 168cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 169cabdff1aSopenharmony_ci src8, src9, src10, src11, src12, src13, src14, src15); 170cabdff1aSopenharmony_ci src += (8 * src_stride); 171cabdff1aSopenharmony_ci 172cabdff1aSopenharmony_ci AVE_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11, 173cabdff1aSopenharmony_ci dst, dst_stride); 174cabdff1aSopenharmony_ci dst += (4 * dst_stride); 175cabdff1aSopenharmony_ci 176cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 177cabdff1aSopenharmony_ci LD_UB4((src + 1), src_stride, src8, src9, src10, src11); 178cabdff1aSopenharmony_ci src += (4 * src_stride); 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci AVE_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15, 181cabdff1aSopenharmony_ci dst, dst_stride); 182cabdff1aSopenharmony_ci dst += (4 * dst_stride); 183cabdff1aSopenharmony_ci 184cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src4, src5, src6, src7); 185cabdff1aSopenharmony_ci LD_UB4((src + 1), src_stride, src12, src13, src14, src15); 186cabdff1aSopenharmony_ci src += (4 * src_stride); 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_ci AVE_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11, 189cabdff1aSopenharmony_ci dst, dst_stride); 190cabdff1aSopenharmony_ci dst += (4 * dst_stride); 191cabdff1aSopenharmony_ci AVE_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15, 192cabdff1aSopenharmony_ci dst, dst_stride); 193cabdff1aSopenharmony_ci} 194cabdff1aSopenharmony_ci 195cabdff1aSopenharmony_cistatic void common_hz_bil_no_rnd_8x16_msa(const uint8_t *src, 196cabdff1aSopenharmony_ci int32_t src_stride, 197cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 198cabdff1aSopenharmony_ci{ 199cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 200cabdff1aSopenharmony_ci v16u8 src9, src10, src11, src12, src13, src14, src15; 201cabdff1aSopenharmony_ci 202cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 203cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 204cabdff1aSopenharmony_ci src8, src9, src10, src11, src12, src13, src14, src15); 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci AVE_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11, 207cabdff1aSopenharmony_ci dst, dst_stride); 208cabdff1aSopenharmony_ci dst += (4 * dst_stride); 209cabdff1aSopenharmony_ci AVE_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15, 210cabdff1aSopenharmony_ci dst, dst_stride); 211cabdff1aSopenharmony_ci} 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_cistatic void common_hz_bil_and_aver_dst_4w_msa(const uint8_t *src, 214cabdff1aSopenharmony_ci int32_t src_stride, 215cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 216cabdff1aSopenharmony_ci uint8_t height) 217cabdff1aSopenharmony_ci{ 218cabdff1aSopenharmony_ci uint8_t loop_cnt; 219cabdff1aSopenharmony_ci uint32_t dst0, dst1, out0, out1; 220cabdff1aSopenharmony_ci v16u8 src0, src1, src0_sld1, src1_sld1, res0, res1; 221cabdff1aSopenharmony_ci v16u8 tmp0 = { 0 }; 222cabdff1aSopenharmony_ci v16u8 tmp1 = { 0 }; 223cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci for (loop_cnt = (height >> 1); loop_cnt--;) { 226cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src0, src1); 227cabdff1aSopenharmony_ci src += (2 * src_stride); 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci SLDI_B2_UB(zeros, src0, zeros, src1, 1, src0_sld1, src1_sld1); 230cabdff1aSopenharmony_ci 231cabdff1aSopenharmony_ci dst0 = LW(dst); 232cabdff1aSopenharmony_ci dst1 = LW(dst + dst_stride); 233cabdff1aSopenharmony_ci tmp0 = (v16u8) __msa_insert_w((v4i32) tmp0, 0, dst0); 234cabdff1aSopenharmony_ci tmp1 = (v16u8) __msa_insert_w((v4i32) tmp1, 0, dst1); 235cabdff1aSopenharmony_ci 236cabdff1aSopenharmony_ci AVER_UB2_UB(src0_sld1, src0, src1_sld1, src1, res0, res1); 237cabdff1aSopenharmony_ci AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1); 238cabdff1aSopenharmony_ci 239cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) res0, 0); 240cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) res1, 0); 241cabdff1aSopenharmony_ci SW(out0, dst); 242cabdff1aSopenharmony_ci dst += dst_stride; 243cabdff1aSopenharmony_ci SW(out1, dst); 244cabdff1aSopenharmony_ci dst += dst_stride; 245cabdff1aSopenharmony_ci } 246cabdff1aSopenharmony_ci} 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_cistatic void common_hz_bil_and_aver_dst_8w_msa(const uint8_t *src, 249cabdff1aSopenharmony_ci int32_t src_stride, 250cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 251cabdff1aSopenharmony_ci uint8_t height) 252cabdff1aSopenharmony_ci{ 253cabdff1aSopenharmony_ci uint8_t loop_cnt; 254cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1; 255cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 256cabdff1aSopenharmony_ci 257cabdff1aSopenharmony_ci for (loop_cnt = (height >> 2); loop_cnt--;) { 258cabdff1aSopenharmony_ci LD_SB4(src, src_stride, src0, src1, src2, src3); 259cabdff1aSopenharmony_ci src += (4 * src_stride); 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_ci SLDI_B4_SB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, 262cabdff1aSopenharmony_ci src0_sld1, src1_sld1, src2_sld1, src3_sld1); 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_ci AVER_DST_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, src2, src2_sld1, 265cabdff1aSopenharmony_ci src3, src3_sld1, dst, dst_stride); 266cabdff1aSopenharmony_ci dst += (4 * dst_stride); 267cabdff1aSopenharmony_ci } 268cabdff1aSopenharmony_ci} 269cabdff1aSopenharmony_ci 270cabdff1aSopenharmony_cistatic void common_hz_bil_and_aver_dst_16w_msa(const uint8_t *src, 271cabdff1aSopenharmony_ci int32_t src_stride, 272cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 273cabdff1aSopenharmony_ci uint8_t height) 274cabdff1aSopenharmony_ci{ 275cabdff1aSopenharmony_ci uint8_t loop_cnt; 276cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 277cabdff1aSopenharmony_ci v16u8 src9, src10, src11, src12, src13, src14, src15; 278cabdff1aSopenharmony_ci 279cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 280cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 281cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 282cabdff1aSopenharmony_ci src8, src9, src10, src11, src12, src13, src14, src15); 283cabdff1aSopenharmony_ci src += (8 * src_stride); 284cabdff1aSopenharmony_ci 285cabdff1aSopenharmony_ci AVER_DST_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11, 286cabdff1aSopenharmony_ci dst, dst_stride); 287cabdff1aSopenharmony_ci dst += (4 * dst_stride); 288cabdff1aSopenharmony_ci AVER_DST_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15, 289cabdff1aSopenharmony_ci dst, dst_stride); 290cabdff1aSopenharmony_ci dst += (4 * dst_stride); 291cabdff1aSopenharmony_ci } 292cabdff1aSopenharmony_ci} 293cabdff1aSopenharmony_ci 294cabdff1aSopenharmony_cistatic void common_vt_bil_4w_msa(const uint8_t *src, int32_t src_stride, 295cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 296cabdff1aSopenharmony_ci uint8_t height) 297cabdff1aSopenharmony_ci{ 298cabdff1aSopenharmony_ci uint8_t loop_cnt; 299cabdff1aSopenharmony_ci uint32_t out0, out1; 300cabdff1aSopenharmony_ci v16u8 src0, src1, src2, res0, res1; 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci src0 = LD_UB(src); 303cabdff1aSopenharmony_ci src += src_stride; 304cabdff1aSopenharmony_ci 305cabdff1aSopenharmony_ci for (loop_cnt = (height >> 1); loop_cnt--;) { 306cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src1, src2); 307cabdff1aSopenharmony_ci src += (2 * src_stride); 308cabdff1aSopenharmony_ci 309cabdff1aSopenharmony_ci AVER_UB2_UB(src0, src1, src1, src2, res0, res1); 310cabdff1aSopenharmony_ci 311cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) res0, 0); 312cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) res1, 0); 313cabdff1aSopenharmony_ci SW(out0, dst); 314cabdff1aSopenharmony_ci dst += dst_stride; 315cabdff1aSopenharmony_ci SW(out1, dst); 316cabdff1aSopenharmony_ci dst += dst_stride; 317cabdff1aSopenharmony_ci 318cabdff1aSopenharmony_ci src0 = src2; 319cabdff1aSopenharmony_ci } 320cabdff1aSopenharmony_ci} 321cabdff1aSopenharmony_ci 322cabdff1aSopenharmony_cistatic void common_vt_bil_8w_msa(const uint8_t *src, int32_t src_stride, 323cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 324cabdff1aSopenharmony_ci uint8_t height) 325cabdff1aSopenharmony_ci{ 326cabdff1aSopenharmony_ci uint8_t loop_cnt; 327cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4; 328cabdff1aSopenharmony_ci 329cabdff1aSopenharmony_ci src0 = LD_UB(src); 330cabdff1aSopenharmony_ci src += src_stride; 331cabdff1aSopenharmony_ci 332cabdff1aSopenharmony_ci for (loop_cnt = (height >> 2); loop_cnt--;) { 333cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src1, src2, src3, src4); 334cabdff1aSopenharmony_ci src += (4 * src_stride); 335cabdff1aSopenharmony_ci 336cabdff1aSopenharmony_ci AVER_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 337cabdff1aSopenharmony_ci dst, dst_stride); 338cabdff1aSopenharmony_ci dst += (4 * dst_stride); 339cabdff1aSopenharmony_ci 340cabdff1aSopenharmony_ci src0 = src4; 341cabdff1aSopenharmony_ci } 342cabdff1aSopenharmony_ci} 343cabdff1aSopenharmony_ci 344cabdff1aSopenharmony_cistatic void common_vt_bil_16w_msa(const uint8_t *src, int32_t src_stride, 345cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 346cabdff1aSopenharmony_ci uint8_t height) 347cabdff1aSopenharmony_ci{ 348cabdff1aSopenharmony_ci uint8_t loop_cnt; 349cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 350cabdff1aSopenharmony_ci 351cabdff1aSopenharmony_ci src0 = LD_UB(src); 352cabdff1aSopenharmony_ci src += src_stride; 353cabdff1aSopenharmony_ci 354cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 355cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8); 356cabdff1aSopenharmony_ci src += (8 * src_stride); 357cabdff1aSopenharmony_ci 358cabdff1aSopenharmony_ci AVER_ST16x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 359cabdff1aSopenharmony_ci dst, dst_stride); 360cabdff1aSopenharmony_ci dst += (4 * dst_stride); 361cabdff1aSopenharmony_ci AVER_ST16x4_UB(src4, src5, src5, src6, src6, src7, src7, src8, 362cabdff1aSopenharmony_ci dst, dst_stride); 363cabdff1aSopenharmony_ci dst += (4 * dst_stride); 364cabdff1aSopenharmony_ci 365cabdff1aSopenharmony_ci src0 = src8; 366cabdff1aSopenharmony_ci } 367cabdff1aSopenharmony_ci} 368cabdff1aSopenharmony_ci 369cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride, 370cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 371cabdff1aSopenharmony_ci{ 372cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 373cabdff1aSopenharmony_ci 374cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 375cabdff1aSopenharmony_ci src += (8 * src_stride); 376cabdff1aSopenharmony_ci src8 = LD_UB(src); 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_ci AVE_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 379cabdff1aSopenharmony_ci dst, dst_stride); 380cabdff1aSopenharmony_ci dst += (4 * dst_stride); 381cabdff1aSopenharmony_ci 382cabdff1aSopenharmony_ci AVE_ST8x4_UB(src4, src5, src5, src6, src6, src7, src7, src8, 383cabdff1aSopenharmony_ci dst, dst_stride); 384cabdff1aSopenharmony_ci} 385cabdff1aSopenharmony_ci 386cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride, 387cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 388cabdff1aSopenharmony_ci{ 389cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4; 390cabdff1aSopenharmony_ci 391cabdff1aSopenharmony_ci LD_UB5(src, src_stride, src0, src1, src2, src3, src4); 392cabdff1aSopenharmony_ci AVE_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 393cabdff1aSopenharmony_ci dst, dst_stride); 394cabdff1aSopenharmony_ci} 395cabdff1aSopenharmony_ci 396cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_16x16_msa(const uint8_t *src, 397cabdff1aSopenharmony_ci int32_t src_stride, 398cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 399cabdff1aSopenharmony_ci{ 400cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 401cabdff1aSopenharmony_ci v16u8 src9, src10, src11, src12, src13, src14, src15, src16; 402cabdff1aSopenharmony_ci 403cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 404cabdff1aSopenharmony_ci src += (8 * src_stride); 405cabdff1aSopenharmony_ci LD_UB8(src, src_stride, 406cabdff1aSopenharmony_ci src8, src9, src10, src11, src12, src13, src14, src15); 407cabdff1aSopenharmony_ci src += (8 * src_stride); 408cabdff1aSopenharmony_ci src16 = LD_UB(src); 409cabdff1aSopenharmony_ci 410cabdff1aSopenharmony_ci AVE_ST16x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 411cabdff1aSopenharmony_ci dst, dst_stride); 412cabdff1aSopenharmony_ci dst += (4 * dst_stride); 413cabdff1aSopenharmony_ci AVE_ST16x4_UB(src4, src5, src5, src6, src6, src7, src7, src8, 414cabdff1aSopenharmony_ci dst, dst_stride); 415cabdff1aSopenharmony_ci dst += (4 * dst_stride); 416cabdff1aSopenharmony_ci AVE_ST16x4_UB(src8, src9, src9, src10, src10, src11, src11, src12, 417cabdff1aSopenharmony_ci dst, dst_stride); 418cabdff1aSopenharmony_ci dst += (4 * dst_stride); 419cabdff1aSopenharmony_ci AVE_ST16x4_UB(src12, src13, src13, src14, 420cabdff1aSopenharmony_ci src14, src15, src15, src16, dst, dst_stride); 421cabdff1aSopenharmony_ci} 422cabdff1aSopenharmony_ci 423cabdff1aSopenharmony_cistatic void common_vt_bil_no_rnd_8x16_msa(const uint8_t *src, 424cabdff1aSopenharmony_ci int32_t src_stride, 425cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 426cabdff1aSopenharmony_ci{ 427cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 428cabdff1aSopenharmony_ci 429cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 430cabdff1aSopenharmony_ci src += (8 * src_stride); 431cabdff1aSopenharmony_ci src8 = LD_UB(src); 432cabdff1aSopenharmony_ci 433cabdff1aSopenharmony_ci AVE_ST16x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 434cabdff1aSopenharmony_ci dst, dst_stride); 435cabdff1aSopenharmony_ci dst += (4 * dst_stride); 436cabdff1aSopenharmony_ci AVE_ST16x4_UB(src4, src5, src5, src6, src6, src7, src7, src8, 437cabdff1aSopenharmony_ci dst, dst_stride); 438cabdff1aSopenharmony_ci} 439cabdff1aSopenharmony_ci 440cabdff1aSopenharmony_cistatic void common_vt_bil_and_aver_dst_4w_msa(const uint8_t *src, 441cabdff1aSopenharmony_ci int32_t src_stride, 442cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 443cabdff1aSopenharmony_ci uint8_t height) 444cabdff1aSopenharmony_ci{ 445cabdff1aSopenharmony_ci uint8_t loop_cnt; 446cabdff1aSopenharmony_ci uint32_t out0, out1, dst0, dst1; 447cabdff1aSopenharmony_ci v16u8 src0, src1, src2; 448cabdff1aSopenharmony_ci v16u8 tmp0 = { 0 }; 449cabdff1aSopenharmony_ci v16u8 tmp1 = { 0 }; 450cabdff1aSopenharmony_ci v16u8 res0, res1; 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_ci src0 = LD_UB(src); 453cabdff1aSopenharmony_ci src += src_stride; 454cabdff1aSopenharmony_ci 455cabdff1aSopenharmony_ci for (loop_cnt = (height >> 1); loop_cnt--;) { 456cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src1, src2); 457cabdff1aSopenharmony_ci src += (2 * src_stride); 458cabdff1aSopenharmony_ci dst0 = LW(dst); 459cabdff1aSopenharmony_ci dst1 = LW(dst + dst_stride); 460cabdff1aSopenharmony_ci tmp0 = (v16u8) __msa_insert_w((v4i32) tmp0, 0, dst0); 461cabdff1aSopenharmony_ci tmp1 = (v16u8) __msa_insert_w((v4i32) tmp1, 0, dst1); 462cabdff1aSopenharmony_ci AVER_UB2_UB(src0, src1, src1, src2, res0, res1); 463cabdff1aSopenharmony_ci AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1); 464cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) res0, 0); 465cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) res1, 0); 466cabdff1aSopenharmony_ci SW(out0, dst); 467cabdff1aSopenharmony_ci dst += dst_stride; 468cabdff1aSopenharmony_ci SW(out1, dst); 469cabdff1aSopenharmony_ci dst += dst_stride; 470cabdff1aSopenharmony_ci src0 = src2; 471cabdff1aSopenharmony_ci } 472cabdff1aSopenharmony_ci} 473cabdff1aSopenharmony_ci 474cabdff1aSopenharmony_cistatic void common_vt_bil_and_aver_dst_8w_msa(const uint8_t *src, 475cabdff1aSopenharmony_ci int32_t src_stride, 476cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 477cabdff1aSopenharmony_ci uint8_t height) 478cabdff1aSopenharmony_ci{ 479cabdff1aSopenharmony_ci uint8_t loop_cnt; 480cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4; 481cabdff1aSopenharmony_ci 482cabdff1aSopenharmony_ci src0 = LD_UB(src); 483cabdff1aSopenharmony_ci src += src_stride; 484cabdff1aSopenharmony_ci 485cabdff1aSopenharmony_ci for (loop_cnt = (height >> 2); loop_cnt--;) { 486cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src1, src2, src3, src4); 487cabdff1aSopenharmony_ci src += (4 * src_stride); 488cabdff1aSopenharmony_ci 489cabdff1aSopenharmony_ci AVER_DST_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 490cabdff1aSopenharmony_ci dst, dst_stride); 491cabdff1aSopenharmony_ci dst += (4 * dst_stride); 492cabdff1aSopenharmony_ci src0 = src4; 493cabdff1aSopenharmony_ci } 494cabdff1aSopenharmony_ci} 495cabdff1aSopenharmony_ci 496cabdff1aSopenharmony_cistatic void common_vt_bil_and_aver_dst_16w_msa(const uint8_t *src, 497cabdff1aSopenharmony_ci int32_t src_stride, 498cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 499cabdff1aSopenharmony_ci uint8_t height) 500cabdff1aSopenharmony_ci{ 501cabdff1aSopenharmony_ci uint8_t loop_cnt; 502cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 503cabdff1aSopenharmony_ci v16u8 res0, res1, res2, res3, res4, res5, res6, res7; 504cabdff1aSopenharmony_ci v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 505cabdff1aSopenharmony_ci 506cabdff1aSopenharmony_ci src0 = LD_UB(src); 507cabdff1aSopenharmony_ci src += src_stride; 508cabdff1aSopenharmony_ci 509cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 510cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8); 511cabdff1aSopenharmony_ci src += (8 * src_stride); 512cabdff1aSopenharmony_ci AVER_UB4_UB(src0, src1, src1, src2, src2, src3, src3, src4, 513cabdff1aSopenharmony_ci res0, res1, res2, res3); 514cabdff1aSopenharmony_ci AVER_UB4_UB(src4, src5, src5, src6, src6, src7, src7, src8, 515cabdff1aSopenharmony_ci res4, res5, res6, res7); 516cabdff1aSopenharmony_ci 517cabdff1aSopenharmony_ci LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); 518cabdff1aSopenharmony_ci AVER_UB4_UB(dst0, res0, dst1, res1, dst2, res2, dst3, res3, 519cabdff1aSopenharmony_ci res0, res1, res2, res3); 520cabdff1aSopenharmony_ci AVER_UB4_UB(dst4, res4, dst5, res5, dst6, res6, dst7, res7, 521cabdff1aSopenharmony_ci res4, res5, res6, res7); 522cabdff1aSopenharmony_ci ST_UB8(res0, res1, res2, res3, res4, res5, res6, res7, dst, dst_stride); 523cabdff1aSopenharmony_ci dst += (8 * dst_stride); 524cabdff1aSopenharmony_ci 525cabdff1aSopenharmony_ci src0 = src8; 526cabdff1aSopenharmony_ci } 527cabdff1aSopenharmony_ci} 528cabdff1aSopenharmony_ci 529cabdff1aSopenharmony_cistatic void common_hv_bil_4w_msa(const uint8_t *src, int32_t src_stride, 530cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 531cabdff1aSopenharmony_ci uint8_t height) 532cabdff1aSopenharmony_ci{ 533cabdff1aSopenharmony_ci uint8_t loop_cnt; 534cabdff1aSopenharmony_ci uint32_t res0, res1; 535cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src0_sld1, src1_sld1, src2_sld1; 536cabdff1aSopenharmony_ci v16u8 src0_r, src1_r, src2_r, res; 537cabdff1aSopenharmony_ci v8u16 add0, add1, add2, sum0, sum1; 538cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 539cabdff1aSopenharmony_ci 540cabdff1aSopenharmony_ci src0 = LD_SB(src); 541cabdff1aSopenharmony_ci src += src_stride; 542cabdff1aSopenharmony_ci 543cabdff1aSopenharmony_ci for (loop_cnt = (height >> 1); loop_cnt--;) { 544cabdff1aSopenharmony_ci LD_SB2(src, src_stride, src1, src2); 545cabdff1aSopenharmony_ci src += (2 * src_stride); 546cabdff1aSopenharmony_ci 547cabdff1aSopenharmony_ci SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, 548cabdff1aSopenharmony_ci src1_sld1, src2_sld1); 549cabdff1aSopenharmony_ci ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, 550cabdff1aSopenharmony_ci src0_r, src1_r, src2_r); 551cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 552cabdff1aSopenharmony_ci ADD2(add0, add1, add1, add2, sum0, sum1); 553cabdff1aSopenharmony_ci SRARI_H2_UH(sum0, sum1, 2); 554cabdff1aSopenharmony_ci res = (v16u8) __msa_pckev_b((v16i8) sum1, (v16i8) sum0); 555cabdff1aSopenharmony_ci res0 = __msa_copy_u_w((v4i32) res, 0); 556cabdff1aSopenharmony_ci res1 = __msa_copy_u_w((v4i32) res, 2); 557cabdff1aSopenharmony_ci SW(res0, dst); 558cabdff1aSopenharmony_ci dst += dst_stride; 559cabdff1aSopenharmony_ci SW(res1, dst); 560cabdff1aSopenharmony_ci dst += dst_stride; 561cabdff1aSopenharmony_ci 562cabdff1aSopenharmony_ci src0 = src2; 563cabdff1aSopenharmony_ci } 564cabdff1aSopenharmony_ci} 565cabdff1aSopenharmony_ci 566cabdff1aSopenharmony_cistatic void common_hv_bil_8w_msa(const uint8_t *src, int32_t src_stride, 567cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 568cabdff1aSopenharmony_ci uint8_t height) 569cabdff1aSopenharmony_ci{ 570cabdff1aSopenharmony_ci uint8_t loop_cnt; 571cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src4; 572cabdff1aSopenharmony_ci v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1, src4_sld1; 573cabdff1aSopenharmony_ci v16u8 src0_r, src1_r, src2_r, src3_r, src4_r; 574cabdff1aSopenharmony_ci v8u16 add0, add1, add2, add3, add4; 575cabdff1aSopenharmony_ci v8u16 sum0, sum1, sum2, sum3; 576cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 577cabdff1aSopenharmony_ci 578cabdff1aSopenharmony_ci src0 = LD_SB(src); 579cabdff1aSopenharmony_ci src += src_stride; 580cabdff1aSopenharmony_ci 581cabdff1aSopenharmony_ci for (loop_cnt = (height >> 2); loop_cnt--;) { 582cabdff1aSopenharmony_ci LD_SB4(src, src_stride, src1, src2, src3, src4); 583cabdff1aSopenharmony_ci src += (4 * src_stride); 584cabdff1aSopenharmony_ci 585cabdff1aSopenharmony_ci SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, 586cabdff1aSopenharmony_ci src1_sld1, src2_sld1); 587cabdff1aSopenharmony_ci SLDI_B2_SB(zeros, src3, zeros, src4, 1, src3_sld1, src4_sld1); 588cabdff1aSopenharmony_ci ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, 589cabdff1aSopenharmony_ci src1_r, src2_r); 590cabdff1aSopenharmony_ci ILVR_B2_UB(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r); 591cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 592cabdff1aSopenharmony_ci HADD_UB2_UH(src3_r, src4_r, add3, add4); 593cabdff1aSopenharmony_ci ADD4(add0, add1, add1, add2, add2, add3, add3, add4, 594cabdff1aSopenharmony_ci sum0, sum1, sum2, sum3); 595cabdff1aSopenharmony_ci SRARI_H4_UH(sum0, sum1, sum2, sum3, 2); 596cabdff1aSopenharmony_ci PCKEV_B2_SB(sum1, sum0, sum3, sum2, src0, src1); 597cabdff1aSopenharmony_ci ST_D4(src0, src1, 0, 1, 0, 1, dst, dst_stride); 598cabdff1aSopenharmony_ci dst += (4 * dst_stride); 599cabdff1aSopenharmony_ci src0 = src4; 600cabdff1aSopenharmony_ci } 601cabdff1aSopenharmony_ci} 602cabdff1aSopenharmony_ci 603cabdff1aSopenharmony_cistatic void common_hv_bil_16w_msa(const uint8_t *src, int32_t src_stride, 604cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 605cabdff1aSopenharmony_ci uint8_t height) 606cabdff1aSopenharmony_ci{ 607cabdff1aSopenharmony_ci uint8_t loop_cnt; 608cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9; 609cabdff1aSopenharmony_ci v16u8 src10, src11, src12, src13, src14, src15, src16, src17; 610cabdff1aSopenharmony_ci v8u16 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r; 611cabdff1aSopenharmony_ci v8u16 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l; 612cabdff1aSopenharmony_ci v8u16 src7_l, src8_l; 613cabdff1aSopenharmony_ci v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r; 614cabdff1aSopenharmony_ci v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l; 615cabdff1aSopenharmony_ci 616cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 617cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 618cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 619cabdff1aSopenharmony_ci src9, src10, src11, src12, src13, src14, src15, src16); 620cabdff1aSopenharmony_ci src += (8 * src_stride); 621cabdff1aSopenharmony_ci 622cabdff1aSopenharmony_ci src8 = LD_UB(src); 623cabdff1aSopenharmony_ci src17 = LD_UB(src + 1); 624cabdff1aSopenharmony_ci 625cabdff1aSopenharmony_ci ILVRL_B2_UH(src9, src0, src0_r, src0_l); 626cabdff1aSopenharmony_ci ILVRL_B2_UH(src10, src1, src1_r, src1_l); 627cabdff1aSopenharmony_ci ILVRL_B2_UH(src11, src2, src2_r, src2_l); 628cabdff1aSopenharmony_ci ILVRL_B2_UH(src12, src3, src3_r, src3_l); 629cabdff1aSopenharmony_ci ILVRL_B2_UH(src13, src4, src4_r, src4_l); 630cabdff1aSopenharmony_ci ILVRL_B2_UH(src14, src5, src5_r, src5_l); 631cabdff1aSopenharmony_ci ILVRL_B2_UH(src15, src6, src6_r, src6_l); 632cabdff1aSopenharmony_ci ILVRL_B2_UH(src16, src7, src7_r, src7_l); 633cabdff1aSopenharmony_ci ILVRL_B2_UH(src17, src8, src8_r, src8_l); 634cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r); 635cabdff1aSopenharmony_ci HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r); 636cabdff1aSopenharmony_ci HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r); 637cabdff1aSopenharmony_ci HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l); 638cabdff1aSopenharmony_ci HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l); 639cabdff1aSopenharmony_ci HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l); 640cabdff1aSopenharmony_ci ADD4(src0_r, src1_r, src1_r, src2_r, src2_r, src3_r, src3_r, src4_r, 641cabdff1aSopenharmony_ci sum0_r, sum1_r, sum2_r, sum3_r); 642cabdff1aSopenharmony_ci ADD4(src4_r, src5_r, src5_r, src6_r, src6_r, src7_r, src7_r, src8_r, 643cabdff1aSopenharmony_ci sum4_r, sum5_r, sum6_r, sum7_r); 644cabdff1aSopenharmony_ci ADD4(src0_l, src1_l, src1_l, src2_l, src2_l, src3_l, src3_l, src4_l, 645cabdff1aSopenharmony_ci sum0_l, sum1_l, sum2_l, sum3_l); 646cabdff1aSopenharmony_ci ADD4(src4_l, src5_l, src5_l, src6_l, src6_l, src7_l, src7_l, src8_l, 647cabdff1aSopenharmony_ci sum4_l, sum5_l, sum6_l, sum7_l); 648cabdff1aSopenharmony_ci SRARI_H4_UH(sum0_r, sum1_r, sum2_r, sum3_r, 2); 649cabdff1aSopenharmony_ci SRARI_H4_UH(sum4_r, sum5_r, sum6_r, sum7_r, 2); 650cabdff1aSopenharmony_ci SRARI_H4_UH(sum0_l, sum1_l, sum2_l, sum3_l, 2); 651cabdff1aSopenharmony_ci SRARI_H4_UH(sum4_l, sum5_l, sum6_l, sum7_l, 2); 652cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r, sum2_l, sum2_r, 653cabdff1aSopenharmony_ci sum3_l, sum3_r, dst, dst_stride); 654cabdff1aSopenharmony_ci dst += (4 * dst_stride); 655cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r, sum6_l, sum6_r, 656cabdff1aSopenharmony_ci sum7_l, sum7_r, dst, dst_stride); 657cabdff1aSopenharmony_ci dst += (4 * dst_stride); 658cabdff1aSopenharmony_ci } 659cabdff1aSopenharmony_ci} 660cabdff1aSopenharmony_ci 661cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride, 662cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 663cabdff1aSopenharmony_ci{ 664cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 665cabdff1aSopenharmony_ci v16u8 src0_sld1, src1_sld1, src2_sld1, src3_sld1; 666cabdff1aSopenharmony_ci v16u8 src4_sld1, src5_sld1, src6_sld1, src7_sld1, src8_sld1; 667cabdff1aSopenharmony_ci v8u16 src0_r, src1_r, src2_r, src3_r; 668cabdff1aSopenharmony_ci v8u16 src4_r, src5_r, src6_r, src7_r, src8_r; 669cabdff1aSopenharmony_ci v8u16 add0, add1, add2, add3, add4, add5, add6, add7, add8; 670cabdff1aSopenharmony_ci v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 671cabdff1aSopenharmony_ci v16i8 out0, out1; 672cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 673cabdff1aSopenharmony_ci 674cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 675cabdff1aSopenharmony_ci src += (8 * src_stride); 676cabdff1aSopenharmony_ci src8 = LD_UB(src); 677cabdff1aSopenharmony_ci 678cabdff1aSopenharmony_ci SLDI_B4_UB(zeros, src0, zeros, src1, zeros, src2, zeros, src3, 1, 679cabdff1aSopenharmony_ci src0_sld1, src1_sld1, src2_sld1, src3_sld1); 680cabdff1aSopenharmony_ci SLDI_B3_UB(zeros, src4, zeros, src5, zeros, src6, 1, src4_sld1, 681cabdff1aSopenharmony_ci src5_sld1, src6_sld1); 682cabdff1aSopenharmony_ci SLDI_B2_UB(zeros, src7, zeros, src8, 1, src7_sld1, src8_sld1); 683cabdff1aSopenharmony_ci ILVR_B4_UH(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src3_sld1, 684cabdff1aSopenharmony_ci src3, src0_r, src1_r, src2_r, src3_r); 685cabdff1aSopenharmony_ci ILVR_B3_UH(src4_sld1, src4, src5_sld1, src5, src6_sld1, src6, src4_r, 686cabdff1aSopenharmony_ci src5_r, src6_r); 687cabdff1aSopenharmony_ci ILVR_B2_UH(src7_sld1, src7, src8_sld1, src8, src7_r, src8_r); 688cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 689cabdff1aSopenharmony_ci HADD_UB3_UH(src3_r, src4_r, src5_r, add3, add4, add5); 690cabdff1aSopenharmony_ci HADD_UB3_UH(src6_r, src7_r, src8_r, add6, add7, add8); 691cabdff1aSopenharmony_ci 692cabdff1aSopenharmony_ci sum0 = add0 + add1 + 1; 693cabdff1aSopenharmony_ci sum1 = add1 + add2 + 1; 694cabdff1aSopenharmony_ci sum2 = add2 + add3 + 1; 695cabdff1aSopenharmony_ci sum3 = add3 + add4 + 1; 696cabdff1aSopenharmony_ci sum4 = add4 + add5 + 1; 697cabdff1aSopenharmony_ci sum5 = add5 + add6 + 1; 698cabdff1aSopenharmony_ci sum6 = add6 + add7 + 1; 699cabdff1aSopenharmony_ci sum7 = add7 + add8 + 1; 700cabdff1aSopenharmony_ci 701cabdff1aSopenharmony_ci SRA_4V(sum0, sum1, sum2, sum3, 2); 702cabdff1aSopenharmony_ci SRA_4V(sum4, sum5, sum6, sum7, 2); 703cabdff1aSopenharmony_ci PCKEV_B2_SB(sum1, sum0, sum3, sum2, out0, out1); 704cabdff1aSopenharmony_ci ST_D4(out0, out1, 0, 1, 0, 1, dst, dst_stride); 705cabdff1aSopenharmony_ci PCKEV_B2_SB(sum5, sum4, sum7, sum6, out0, out1); 706cabdff1aSopenharmony_ci ST_D4(out0, out1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride); 707cabdff1aSopenharmony_ci} 708cabdff1aSopenharmony_ci 709cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride, 710cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 711cabdff1aSopenharmony_ci{ 712cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src4; 713cabdff1aSopenharmony_ci v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1, src4_sld1; 714cabdff1aSopenharmony_ci v8u16 src0_r, src1_r, src2_r, src3_r, src4_r; 715cabdff1aSopenharmony_ci v8u16 add0, add1, add2, add3, add4; 716cabdff1aSopenharmony_ci v8u16 sum0, sum1, sum2, sum3; 717cabdff1aSopenharmony_ci v16i8 out0, out1; 718cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 719cabdff1aSopenharmony_ci 720cabdff1aSopenharmony_ci LD_SB4(src, src_stride, src0, src1, src2, src3); 721cabdff1aSopenharmony_ci src += (4 * src_stride); 722cabdff1aSopenharmony_ci src4 = LD_SB(src); 723cabdff1aSopenharmony_ci 724cabdff1aSopenharmony_ci SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, 725cabdff1aSopenharmony_ci src1_sld1, src2_sld1); 726cabdff1aSopenharmony_ci SLDI_B2_SB(zeros, src3, zeros, src4, 1, src3_sld1, src4_sld1); 727cabdff1aSopenharmony_ci ILVR_B3_UH(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, 728cabdff1aSopenharmony_ci src1_r, src2_r); 729cabdff1aSopenharmony_ci ILVR_B2_UH(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r); 730cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 731cabdff1aSopenharmony_ci HADD_UB2_UH(src3_r, src4_r, add3, add4); 732cabdff1aSopenharmony_ci 733cabdff1aSopenharmony_ci sum0 = add0 + add1 + 1; 734cabdff1aSopenharmony_ci sum1 = add1 + add2 + 1; 735cabdff1aSopenharmony_ci sum2 = add2 + add3 + 1; 736cabdff1aSopenharmony_ci sum3 = add3 + add4 + 1; 737cabdff1aSopenharmony_ci 738cabdff1aSopenharmony_ci SRA_4V(sum0, sum1, sum2, sum3, 2); 739cabdff1aSopenharmony_ci PCKEV_B2_SB(sum1, sum0, sum3, sum2, out0, out1); 740cabdff1aSopenharmony_ci ST_D4(out0, out1, 0, 1, 0, 1, dst, dst_stride); 741cabdff1aSopenharmony_ci} 742cabdff1aSopenharmony_ci 743cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_16x16_msa(const uint8_t *src, 744cabdff1aSopenharmony_ci int32_t src_stride, 745cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 746cabdff1aSopenharmony_ci{ 747cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9; 748cabdff1aSopenharmony_ci v16u8 src10, src11, src12, src13, src14, src15, src16, src17; 749cabdff1aSopenharmony_ci v8u16 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r; 750cabdff1aSopenharmony_ci v8u16 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l; 751cabdff1aSopenharmony_ci v8u16 src7_l, src8_l; 752cabdff1aSopenharmony_ci v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r; 753cabdff1aSopenharmony_ci v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l; 754cabdff1aSopenharmony_ci 755cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 756cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 757cabdff1aSopenharmony_ci src9, src10, src11, src12, src13, src14, src15, src16); 758cabdff1aSopenharmony_ci src += (8 * src_stride); 759cabdff1aSopenharmony_ci src8 = LD_UB(src); 760cabdff1aSopenharmony_ci src17 = LD_UB(src + 1); 761cabdff1aSopenharmony_ci 762cabdff1aSopenharmony_ci ILVRL_B2_UH(src9, src0, src0_r, src0_l); 763cabdff1aSopenharmony_ci ILVRL_B2_UH(src10, src1, src1_r, src1_l); 764cabdff1aSopenharmony_ci ILVRL_B2_UH(src11, src2, src2_r, src2_l); 765cabdff1aSopenharmony_ci ILVRL_B2_UH(src12, src3, src3_r, src3_l); 766cabdff1aSopenharmony_ci ILVRL_B2_UH(src13, src4, src4_r, src4_l); 767cabdff1aSopenharmony_ci ILVRL_B2_UH(src14, src5, src5_r, src5_l); 768cabdff1aSopenharmony_ci ILVRL_B2_UH(src15, src6, src6_r, src6_l); 769cabdff1aSopenharmony_ci ILVRL_B2_UH(src16, src7, src7_r, src7_l); 770cabdff1aSopenharmony_ci ILVRL_B2_UH(src17, src8, src8_r, src8_l); 771cabdff1aSopenharmony_ci 772cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r); 773cabdff1aSopenharmony_ci HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r); 774cabdff1aSopenharmony_ci HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r); 775cabdff1aSopenharmony_ci HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l); 776cabdff1aSopenharmony_ci HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l); 777cabdff1aSopenharmony_ci HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l); 778cabdff1aSopenharmony_ci 779cabdff1aSopenharmony_ci sum0_r = src0_r + src1_r + 1; 780cabdff1aSopenharmony_ci sum1_r = src1_r + src2_r + 1; 781cabdff1aSopenharmony_ci sum2_r = src2_r + src3_r + 1; 782cabdff1aSopenharmony_ci sum3_r = src3_r + src4_r + 1; 783cabdff1aSopenharmony_ci sum4_r = src4_r + src5_r + 1; 784cabdff1aSopenharmony_ci sum5_r = src5_r + src6_r + 1; 785cabdff1aSopenharmony_ci sum6_r = src6_r + src7_r + 1; 786cabdff1aSopenharmony_ci sum7_r = src7_r + src8_r + 1; 787cabdff1aSopenharmony_ci sum0_l = src0_l + src1_l + 1; 788cabdff1aSopenharmony_ci sum1_l = src1_l + src2_l + 1; 789cabdff1aSopenharmony_ci sum2_l = src2_l + src3_l + 1; 790cabdff1aSopenharmony_ci sum3_l = src3_l + src4_l + 1; 791cabdff1aSopenharmony_ci sum4_l = src4_l + src5_l + 1; 792cabdff1aSopenharmony_ci sum5_l = src5_l + src6_l + 1; 793cabdff1aSopenharmony_ci sum6_l = src6_l + src7_l + 1; 794cabdff1aSopenharmony_ci sum7_l = src7_l + src8_l + 1; 795cabdff1aSopenharmony_ci 796cabdff1aSopenharmony_ci SRA_4V(sum0_r, sum1_r, sum2_r, sum3_r, 2); 797cabdff1aSopenharmony_ci SRA_4V(sum4_r, sum5_r, sum6_r, sum7_r, 2); 798cabdff1aSopenharmony_ci SRA_4V(sum0_l, sum1_l, sum2_l, sum3_l, 2); 799cabdff1aSopenharmony_ci SRA_4V(sum4_l, sum5_l, sum6_l, sum7_l, 2); 800cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r, 801cabdff1aSopenharmony_ci sum2_l, sum2_r, sum3_l, sum3_r, dst, dst_stride); 802cabdff1aSopenharmony_ci dst += (4 * dst_stride); 803cabdff1aSopenharmony_ci 804cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 805cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 806cabdff1aSopenharmony_ci src9, src10, src11, src12, src13, src14, src15, src16); 807cabdff1aSopenharmony_ci src += (8 * src_stride); 808cabdff1aSopenharmony_ci src8 = LD_UB(src); 809cabdff1aSopenharmony_ci src17 = LD_UB(src + 1); 810cabdff1aSopenharmony_ci 811cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r, 812cabdff1aSopenharmony_ci sum6_l, sum6_r, sum7_l, sum7_r, dst, dst_stride); 813cabdff1aSopenharmony_ci dst += (4 * dst_stride); 814cabdff1aSopenharmony_ci 815cabdff1aSopenharmony_ci ILVRL_B2_UH(src9, src0, src0_r, src0_l); 816cabdff1aSopenharmony_ci ILVRL_B2_UH(src10, src1, src1_r, src1_l); 817cabdff1aSopenharmony_ci ILVRL_B2_UH(src11, src2, src2_r, src2_l); 818cabdff1aSopenharmony_ci ILVRL_B2_UH(src12, src3, src3_r, src3_l); 819cabdff1aSopenharmony_ci ILVRL_B2_UH(src13, src4, src4_r, src4_l); 820cabdff1aSopenharmony_ci ILVRL_B2_UH(src14, src5, src5_r, src5_l); 821cabdff1aSopenharmony_ci ILVRL_B2_UH(src15, src6, src6_r, src6_l); 822cabdff1aSopenharmony_ci ILVRL_B2_UH(src16, src7, src7_r, src7_l); 823cabdff1aSopenharmony_ci ILVRL_B2_UH(src17, src8, src8_r, src8_l); 824cabdff1aSopenharmony_ci 825cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r); 826cabdff1aSopenharmony_ci HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r); 827cabdff1aSopenharmony_ci HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r); 828cabdff1aSopenharmony_ci HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l); 829cabdff1aSopenharmony_ci HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l); 830cabdff1aSopenharmony_ci HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l); 831cabdff1aSopenharmony_ci 832cabdff1aSopenharmony_ci sum0_r = src0_r + src1_r + 1; 833cabdff1aSopenharmony_ci sum1_r = src1_r + src2_r + 1; 834cabdff1aSopenharmony_ci sum2_r = src2_r + src3_r + 1; 835cabdff1aSopenharmony_ci sum3_r = src3_r + src4_r + 1; 836cabdff1aSopenharmony_ci sum4_r = src4_r + src5_r + 1; 837cabdff1aSopenharmony_ci sum5_r = src5_r + src6_r + 1; 838cabdff1aSopenharmony_ci sum6_r = src6_r + src7_r + 1; 839cabdff1aSopenharmony_ci sum7_r = src7_r + src8_r + 1; 840cabdff1aSopenharmony_ci sum0_l = src0_l + src1_l + 1; 841cabdff1aSopenharmony_ci sum1_l = src1_l + src2_l + 1; 842cabdff1aSopenharmony_ci sum2_l = src2_l + src3_l + 1; 843cabdff1aSopenharmony_ci sum3_l = src3_l + src4_l + 1; 844cabdff1aSopenharmony_ci sum4_l = src4_l + src5_l + 1; 845cabdff1aSopenharmony_ci sum5_l = src5_l + src6_l + 1; 846cabdff1aSopenharmony_ci sum6_l = src6_l + src7_l + 1; 847cabdff1aSopenharmony_ci sum7_l = src7_l + src8_l + 1; 848cabdff1aSopenharmony_ci 849cabdff1aSopenharmony_ci SRA_4V(sum0_r, sum1_r, sum2_r, sum3_r, 2); 850cabdff1aSopenharmony_ci SRA_4V(sum4_r, sum5_r, sum6_r, sum7_r, 2); 851cabdff1aSopenharmony_ci SRA_4V(sum0_l, sum1_l, sum2_l, sum3_l, 2); 852cabdff1aSopenharmony_ci SRA_4V(sum4_l, sum5_l, sum6_l, sum7_l, 2); 853cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r, 854cabdff1aSopenharmony_ci sum2_l, sum2_r, sum3_l, sum3_r, dst, dst_stride); 855cabdff1aSopenharmony_ci dst += (4 * dst_stride); 856cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r, 857cabdff1aSopenharmony_ci sum6_l, sum6_r, sum7_l, sum7_r, dst, dst_stride); 858cabdff1aSopenharmony_ci} 859cabdff1aSopenharmony_ci 860cabdff1aSopenharmony_cistatic void common_hv_bil_no_rnd_8x16_msa(const uint8_t *src, 861cabdff1aSopenharmony_ci int32_t src_stride, 862cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride) 863cabdff1aSopenharmony_ci{ 864cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9; 865cabdff1aSopenharmony_ci v16u8 src10, src11, src12, src13, src14, src15, src16, src17; 866cabdff1aSopenharmony_ci v8u16 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r; 867cabdff1aSopenharmony_ci v8u16 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l; 868cabdff1aSopenharmony_ci v8u16 src7_l, src8_l; 869cabdff1aSopenharmony_ci v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r; 870cabdff1aSopenharmony_ci v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l; 871cabdff1aSopenharmony_ci 872cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 873cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 874cabdff1aSopenharmony_ci src9, src10, src11, src12, src13, src14, src15, src16); 875cabdff1aSopenharmony_ci src += (8 * src_stride); 876cabdff1aSopenharmony_ci src8 = LD_UB(src); 877cabdff1aSopenharmony_ci src17 = LD_UB(src + 1); 878cabdff1aSopenharmony_ci 879cabdff1aSopenharmony_ci ILVRL_B2_UH(src9, src0, src0_r, src0_l); 880cabdff1aSopenharmony_ci ILVRL_B2_UH(src10, src1, src1_r, src1_l); 881cabdff1aSopenharmony_ci ILVRL_B2_UH(src11, src2, src2_r, src2_l); 882cabdff1aSopenharmony_ci ILVRL_B2_UH(src12, src3, src3_r, src3_l); 883cabdff1aSopenharmony_ci ILVRL_B2_UH(src13, src4, src4_r, src4_l); 884cabdff1aSopenharmony_ci ILVRL_B2_UH(src14, src5, src5_r, src5_l); 885cabdff1aSopenharmony_ci ILVRL_B2_UH(src15, src6, src6_r, src6_l); 886cabdff1aSopenharmony_ci ILVRL_B2_UH(src16, src7, src7_r, src7_l); 887cabdff1aSopenharmony_ci ILVRL_B2_UH(src17, src8, src8_r, src8_l); 888cabdff1aSopenharmony_ci 889cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r); 890cabdff1aSopenharmony_ci HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r); 891cabdff1aSopenharmony_ci HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r); 892cabdff1aSopenharmony_ci HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l); 893cabdff1aSopenharmony_ci HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l); 894cabdff1aSopenharmony_ci HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l); 895cabdff1aSopenharmony_ci 896cabdff1aSopenharmony_ci sum0_r = src0_r + src1_r + 1; 897cabdff1aSopenharmony_ci sum1_r = src1_r + src2_r + 1; 898cabdff1aSopenharmony_ci sum2_r = src2_r + src3_r + 1; 899cabdff1aSopenharmony_ci sum3_r = src3_r + src4_r + 1; 900cabdff1aSopenharmony_ci sum4_r = src4_r + src5_r + 1; 901cabdff1aSopenharmony_ci sum5_r = src5_r + src6_r + 1; 902cabdff1aSopenharmony_ci sum6_r = src6_r + src7_r + 1; 903cabdff1aSopenharmony_ci sum7_r = src7_r + src8_r + 1; 904cabdff1aSopenharmony_ci sum0_l = src0_l + src1_l + 1; 905cabdff1aSopenharmony_ci sum1_l = src1_l + src2_l + 1; 906cabdff1aSopenharmony_ci sum2_l = src2_l + src3_l + 1; 907cabdff1aSopenharmony_ci sum3_l = src3_l + src4_l + 1; 908cabdff1aSopenharmony_ci sum4_l = src4_l + src5_l + 1; 909cabdff1aSopenharmony_ci sum5_l = src5_l + src6_l + 1; 910cabdff1aSopenharmony_ci sum6_l = src6_l + src7_l + 1; 911cabdff1aSopenharmony_ci sum7_l = src7_l + src8_l + 1; 912cabdff1aSopenharmony_ci 913cabdff1aSopenharmony_ci SRA_4V(sum0_r, sum1_r, sum2_r, sum3_r, 2); 914cabdff1aSopenharmony_ci SRA_4V(sum4_r, sum5_r, sum6_r, sum7_r, 2); 915cabdff1aSopenharmony_ci SRA_4V(sum0_l, sum1_l, sum2_l, sum3_l, 2); 916cabdff1aSopenharmony_ci SRA_4V(sum4_l, sum5_l, sum6_l, sum7_l, 2); 917cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r, 918cabdff1aSopenharmony_ci sum2_l, sum2_r, sum3_l, sum3_r, dst, dst_stride); 919cabdff1aSopenharmony_ci dst += (4 * dst_stride); 920cabdff1aSopenharmony_ci PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r, 921cabdff1aSopenharmony_ci sum6_l, sum6_r, sum7_l, sum7_r, dst, dst_stride); 922cabdff1aSopenharmony_ci} 923cabdff1aSopenharmony_ci 924cabdff1aSopenharmony_cistatic void common_hv_bil_and_aver_dst_4w_msa(const uint8_t *src, 925cabdff1aSopenharmony_ci int32_t src_stride, 926cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 927cabdff1aSopenharmony_ci uint8_t height) 928cabdff1aSopenharmony_ci{ 929cabdff1aSopenharmony_ci uint8_t loop_cnt; 930cabdff1aSopenharmony_ci uint32_t out0, out1; 931cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src0_sld1, src1_sld1, src2_sld1; 932cabdff1aSopenharmony_ci v16u8 src0_r, src1_r, src2_r; 933cabdff1aSopenharmony_ci v8u16 add0, add1, add2, sum0, sum1; 934cabdff1aSopenharmony_ci v16u8 dst0, dst1, res0, res1; 935cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 936cabdff1aSopenharmony_ci 937cabdff1aSopenharmony_ci src0 = LD_SB(src); 938cabdff1aSopenharmony_ci src += src_stride; 939cabdff1aSopenharmony_ci 940cabdff1aSopenharmony_ci for (loop_cnt = (height >> 1); loop_cnt--;) { 941cabdff1aSopenharmony_ci LD_SB2(src, src_stride, src1, src2); 942cabdff1aSopenharmony_ci src += (2 * src_stride); 943cabdff1aSopenharmony_ci 944cabdff1aSopenharmony_ci LD_UB2(dst, dst_stride, dst0, dst1); 945cabdff1aSopenharmony_ci SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, 946cabdff1aSopenharmony_ci src1_sld1, src2_sld1); 947cabdff1aSopenharmony_ci ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, 948cabdff1aSopenharmony_ci src1_r, src2_r); 949cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 950cabdff1aSopenharmony_ci ADD2(add0, add1, add1, add2, sum0, sum1); 951cabdff1aSopenharmony_ci SRARI_H2_UH(sum0, sum1, 2); 952cabdff1aSopenharmony_ci PCKEV_B2_UB(sum0, sum0, sum1, sum1, res0, res1); 953cabdff1aSopenharmony_ci AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1); 954cabdff1aSopenharmony_ci 955cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) res0, 0); 956cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) res1, 0); 957cabdff1aSopenharmony_ci SW(out0, dst); 958cabdff1aSopenharmony_ci dst += dst_stride; 959cabdff1aSopenharmony_ci SW(out1, dst); 960cabdff1aSopenharmony_ci dst += dst_stride; 961cabdff1aSopenharmony_ci 962cabdff1aSopenharmony_ci src0 = src2; 963cabdff1aSopenharmony_ci } 964cabdff1aSopenharmony_ci} 965cabdff1aSopenharmony_ci 966cabdff1aSopenharmony_cistatic void common_hv_bil_and_aver_dst_8w_msa(const uint8_t *src, 967cabdff1aSopenharmony_ci int32_t src_stride, 968cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 969cabdff1aSopenharmony_ci uint8_t height) 970cabdff1aSopenharmony_ci{ 971cabdff1aSopenharmony_ci uint8_t loop_cnt; 972cabdff1aSopenharmony_ci v16i8 src0, src1, src2, src3, src4; 973cabdff1aSopenharmony_ci v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1, src4_sld1; 974cabdff1aSopenharmony_ci v16u8 dst0, dst1, dst2, dst3; 975cabdff1aSopenharmony_ci v16u8 src0_r, src1_r, src2_r, src3_r, src4_r; 976cabdff1aSopenharmony_ci v8u16 add0, add1, add2, add3, add4; 977cabdff1aSopenharmony_ci v8u16 sum0, sum1, sum2, sum3; 978cabdff1aSopenharmony_ci v16i8 zeros = { 0 }; 979cabdff1aSopenharmony_ci 980cabdff1aSopenharmony_ci src0 = LD_SB(src); 981cabdff1aSopenharmony_ci src += src_stride; 982cabdff1aSopenharmony_ci 983cabdff1aSopenharmony_ci for (loop_cnt = (height >> 2); loop_cnt--;) { 984cabdff1aSopenharmony_ci LD_SB4(src, src_stride, src1, src2, src3, src4); 985cabdff1aSopenharmony_ci src += (4 * src_stride); 986cabdff1aSopenharmony_ci 987cabdff1aSopenharmony_ci LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 988cabdff1aSopenharmony_ci SLDI_B3_SB(zeros, src0, zeros, src1, zeros, src2, 1, src0_sld1, 989cabdff1aSopenharmony_ci src1_sld1, src2_sld1); 990cabdff1aSopenharmony_ci SLDI_B2_SB(zeros, src3, zeros, src4, 1, src3_sld1, src4_sld1); 991cabdff1aSopenharmony_ci ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r, 992cabdff1aSopenharmony_ci src1_r, src2_r); 993cabdff1aSopenharmony_ci ILVR_B2_UB(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r); 994cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 995cabdff1aSopenharmony_ci HADD_UB2_UH(src3_r, src4_r, add3, add4); 996cabdff1aSopenharmony_ci ADD4(add0, add1, add1, add2, add2, add3, add3, add4, 997cabdff1aSopenharmony_ci sum0, sum1, sum2, sum3); 998cabdff1aSopenharmony_ci SRARI_H4_UH(sum0, sum1, sum2, sum3, 2); 999cabdff1aSopenharmony_ci PCKEV_AVG_ST8x4_UB(sum0, dst0, sum1, dst1, 1000cabdff1aSopenharmony_ci sum2, dst2, sum3, dst3, dst, dst_stride); 1001cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1002cabdff1aSopenharmony_ci src0 = src4; 1003cabdff1aSopenharmony_ci } 1004cabdff1aSopenharmony_ci} 1005cabdff1aSopenharmony_ci 1006cabdff1aSopenharmony_cistatic void common_hv_bil_and_aver_dst_16w_msa(const uint8_t *src, 1007cabdff1aSopenharmony_ci int32_t src_stride, 1008cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1009cabdff1aSopenharmony_ci uint8_t height) 1010cabdff1aSopenharmony_ci{ 1011cabdff1aSopenharmony_ci uint8_t loop_cnt; 1012cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; 1013cabdff1aSopenharmony_ci v16u8 src11, src12, src13, src14, src15, src16, src17; 1014cabdff1aSopenharmony_ci v16u8 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r; 1015cabdff1aSopenharmony_ci v16u8 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l; 1016cabdff1aSopenharmony_ci v16u8 src7_l, src8_l; 1017cabdff1aSopenharmony_ci v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 1018cabdff1aSopenharmony_ci v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r; 1019cabdff1aSopenharmony_ci v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l; 1020cabdff1aSopenharmony_ci v8u16 add0, add1, add2, add3, add4, add5, add6, add7, add8; 1021cabdff1aSopenharmony_ci 1022cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 1023cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 1024cabdff1aSopenharmony_ci LD_UB8((src + 1), src_stride, 1025cabdff1aSopenharmony_ci src9, src10, src11, src12, src13, src14, src15, src16); 1026cabdff1aSopenharmony_ci src += (8 * src_stride); 1027cabdff1aSopenharmony_ci 1028cabdff1aSopenharmony_ci src8 = LD_UB(src); 1029cabdff1aSopenharmony_ci src17 = LD_UB(src + 1); 1030cabdff1aSopenharmony_ci 1031cabdff1aSopenharmony_ci ILVRL_B2_UB(src9, src0, src0_r, src0_l); 1032cabdff1aSopenharmony_ci ILVRL_B2_UB(src10, src1, src1_r, src1_l); 1033cabdff1aSopenharmony_ci ILVRL_B2_UB(src11, src2, src2_r, src2_l); 1034cabdff1aSopenharmony_ci ILVRL_B2_UB(src12, src3, src3_r, src3_l); 1035cabdff1aSopenharmony_ci ILVRL_B2_UB(src13, src4, src4_r, src4_l); 1036cabdff1aSopenharmony_ci ILVRL_B2_UB(src14, src5, src5_r, src5_l); 1037cabdff1aSopenharmony_ci ILVRL_B2_UB(src15, src6, src6_r, src6_l); 1038cabdff1aSopenharmony_ci ILVRL_B2_UB(src16, src7, src7_r, src7_l); 1039cabdff1aSopenharmony_ci ILVRL_B2_UB(src17, src8, src8_r, src8_l); 1040cabdff1aSopenharmony_ci HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2); 1041cabdff1aSopenharmony_ci HADD_UB3_UH(src3_r, src4_r, src5_r, add3, add4, add5); 1042cabdff1aSopenharmony_ci HADD_UB3_UH(src6_r, src7_r, src8_r, add6, add7, add8); 1043cabdff1aSopenharmony_ci ADD4(add0, add1, add1, add2, add2, add3, add3, add4, sum0_r, sum1_r, 1044cabdff1aSopenharmony_ci sum2_r, sum3_r); 1045cabdff1aSopenharmony_ci ADD4(add4, add5, add5, add6, add6, add7, add7, add8, sum4_r, sum5_r, 1046cabdff1aSopenharmony_ci sum6_r, sum7_r); 1047cabdff1aSopenharmony_ci HADD_UB3_UH(src0_l, src1_l, src2_l, add0, add1, add2); 1048cabdff1aSopenharmony_ci HADD_UB3_UH(src3_l, src4_l, src5_l, add3, add4, add5); 1049cabdff1aSopenharmony_ci HADD_UB3_UH(src6_l, src7_l, src8_l, add6, add7, add8); 1050cabdff1aSopenharmony_ci ADD4(add0, add1, add1, add2, add2, add3, add3, add4, sum0_l, sum1_l, 1051cabdff1aSopenharmony_ci sum2_l, sum3_l); 1052cabdff1aSopenharmony_ci ADD4(add4, add5, add5, add6, add6, add7, add7, add8, sum4_l, sum5_l, 1053cabdff1aSopenharmony_ci sum6_l, sum7_l); 1054cabdff1aSopenharmony_ci SRARI_H4_UH(sum0_r, sum1_r, sum2_r, sum3_r, 2); 1055cabdff1aSopenharmony_ci SRARI_H4_UH(sum4_r, sum5_r, sum6_r, sum7_r, 2); 1056cabdff1aSopenharmony_ci SRARI_H4_UH(sum0_l, sum1_l, sum2_l, sum3_l, 2); 1057cabdff1aSopenharmony_ci SRARI_H4_UH(sum4_l, sum5_l, sum6_l, sum7_l, 2); 1058cabdff1aSopenharmony_ci LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); 1059cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum0_l, sum0_r, dst0, dst); 1060cabdff1aSopenharmony_ci dst += dst_stride; 1061cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum1_l, sum1_r, dst1, dst); 1062cabdff1aSopenharmony_ci dst += dst_stride; 1063cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum2_l, sum2_r, dst2, dst); 1064cabdff1aSopenharmony_ci dst += dst_stride; 1065cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum3_l, sum3_r, dst3, dst); 1066cabdff1aSopenharmony_ci dst += dst_stride; 1067cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum4_l, sum4_r, dst4, dst); 1068cabdff1aSopenharmony_ci dst += dst_stride; 1069cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum5_l, sum5_r, dst5, dst); 1070cabdff1aSopenharmony_ci dst += dst_stride; 1071cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum6_l, sum6_r, dst6, dst); 1072cabdff1aSopenharmony_ci dst += dst_stride; 1073cabdff1aSopenharmony_ci PCKEV_AVG_ST_UB(sum7_l, sum7_r, dst7, dst); 1074cabdff1aSopenharmony_ci dst += dst_stride; 1075cabdff1aSopenharmony_ci } 1076cabdff1aSopenharmony_ci} 1077cabdff1aSopenharmony_ci 1078cabdff1aSopenharmony_cistatic void copy_width8_msa(const uint8_t *src, int32_t src_stride, 1079cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1080cabdff1aSopenharmony_ci int32_t height) 1081cabdff1aSopenharmony_ci{ 1082cabdff1aSopenharmony_ci int32_t cnt; 1083cabdff1aSopenharmony_ci uint64_t out0, out1, out2, out3, out4, out5, out6, out7; 1084cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 1085cabdff1aSopenharmony_ci 1086cabdff1aSopenharmony_ci if (0 == height % 12) { 1087cabdff1aSopenharmony_ci for (cnt = (height / 12); cnt--;) { 1088cabdff1aSopenharmony_ci LD_UB8(src, src_stride, 1089cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 1090cabdff1aSopenharmony_ci src += (8 * src_stride); 1091cabdff1aSopenharmony_ci 1092cabdff1aSopenharmony_ci out0 = __msa_copy_u_d((v2i64) src0, 0); 1093cabdff1aSopenharmony_ci out1 = __msa_copy_u_d((v2i64) src1, 0); 1094cabdff1aSopenharmony_ci out2 = __msa_copy_u_d((v2i64) src2, 0); 1095cabdff1aSopenharmony_ci out3 = __msa_copy_u_d((v2i64) src3, 0); 1096cabdff1aSopenharmony_ci out4 = __msa_copy_u_d((v2i64) src4, 0); 1097cabdff1aSopenharmony_ci out5 = __msa_copy_u_d((v2i64) src5, 0); 1098cabdff1aSopenharmony_ci out6 = __msa_copy_u_d((v2i64) src6, 0); 1099cabdff1aSopenharmony_ci out7 = __msa_copy_u_d((v2i64) src7, 0); 1100cabdff1aSopenharmony_ci 1101cabdff1aSopenharmony_ci SD4(out0, out1, out2, out3, dst, dst_stride); 1102cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1103cabdff1aSopenharmony_ci SD4(out4, out5, out6, out7, dst, dst_stride); 1104cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1105cabdff1aSopenharmony_ci 1106cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 1107cabdff1aSopenharmony_ci src += (4 * src_stride); 1108cabdff1aSopenharmony_ci 1109cabdff1aSopenharmony_ci out0 = __msa_copy_u_d((v2i64) src0, 0); 1110cabdff1aSopenharmony_ci out1 = __msa_copy_u_d((v2i64) src1, 0); 1111cabdff1aSopenharmony_ci out2 = __msa_copy_u_d((v2i64) src2, 0); 1112cabdff1aSopenharmony_ci out3 = __msa_copy_u_d((v2i64) src3, 0); 1113cabdff1aSopenharmony_ci 1114cabdff1aSopenharmony_ci SD4(out0, out1, out2, out3, dst, dst_stride); 1115cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1116cabdff1aSopenharmony_ci } 1117cabdff1aSopenharmony_ci } else if (0 == height % 8) { 1118cabdff1aSopenharmony_ci for (cnt = height >> 3; cnt--;) { 1119cabdff1aSopenharmony_ci LD_UB8(src, src_stride, 1120cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 1121cabdff1aSopenharmony_ci src += (8 * src_stride); 1122cabdff1aSopenharmony_ci 1123cabdff1aSopenharmony_ci out0 = __msa_copy_u_d((v2i64) src0, 0); 1124cabdff1aSopenharmony_ci out1 = __msa_copy_u_d((v2i64) src1, 0); 1125cabdff1aSopenharmony_ci out2 = __msa_copy_u_d((v2i64) src2, 0); 1126cabdff1aSopenharmony_ci out3 = __msa_copy_u_d((v2i64) src3, 0); 1127cabdff1aSopenharmony_ci out4 = __msa_copy_u_d((v2i64) src4, 0); 1128cabdff1aSopenharmony_ci out5 = __msa_copy_u_d((v2i64) src5, 0); 1129cabdff1aSopenharmony_ci out6 = __msa_copy_u_d((v2i64) src6, 0); 1130cabdff1aSopenharmony_ci out7 = __msa_copy_u_d((v2i64) src7, 0); 1131cabdff1aSopenharmony_ci 1132cabdff1aSopenharmony_ci SD4(out0, out1, out2, out3, dst, dst_stride); 1133cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1134cabdff1aSopenharmony_ci SD4(out4, out5, out6, out7, dst, dst_stride); 1135cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1136cabdff1aSopenharmony_ci } 1137cabdff1aSopenharmony_ci } else if (0 == height % 4) { 1138cabdff1aSopenharmony_ci for (cnt = (height / 4); cnt--;) { 1139cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 1140cabdff1aSopenharmony_ci src += (4 * src_stride); 1141cabdff1aSopenharmony_ci out0 = __msa_copy_u_d((v2i64) src0, 0); 1142cabdff1aSopenharmony_ci out1 = __msa_copy_u_d((v2i64) src1, 0); 1143cabdff1aSopenharmony_ci out2 = __msa_copy_u_d((v2i64) src2, 0); 1144cabdff1aSopenharmony_ci out3 = __msa_copy_u_d((v2i64) src3, 0); 1145cabdff1aSopenharmony_ci 1146cabdff1aSopenharmony_ci SD4(out0, out1, out2, out3, dst, dst_stride); 1147cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1148cabdff1aSopenharmony_ci } 1149cabdff1aSopenharmony_ci } else if (0 == height % 2) { 1150cabdff1aSopenharmony_ci for (cnt = (height / 2); cnt--;) { 1151cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src0, src1); 1152cabdff1aSopenharmony_ci src += (2 * src_stride); 1153cabdff1aSopenharmony_ci out0 = __msa_copy_u_d((v2i64) src0, 0); 1154cabdff1aSopenharmony_ci out1 = __msa_copy_u_d((v2i64) src1, 0); 1155cabdff1aSopenharmony_ci 1156cabdff1aSopenharmony_ci SD(out0, dst); 1157cabdff1aSopenharmony_ci dst += dst_stride; 1158cabdff1aSopenharmony_ci SD(out1, dst); 1159cabdff1aSopenharmony_ci dst += dst_stride; 1160cabdff1aSopenharmony_ci } 1161cabdff1aSopenharmony_ci } 1162cabdff1aSopenharmony_ci} 1163cabdff1aSopenharmony_ci 1164cabdff1aSopenharmony_cistatic void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, 1165cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1166cabdff1aSopenharmony_ci int32_t height, int32_t width) 1167cabdff1aSopenharmony_ci{ 1168cabdff1aSopenharmony_ci int32_t cnt, loop_cnt; 1169cabdff1aSopenharmony_ci const uint8_t *src_tmp; 1170cabdff1aSopenharmony_ci uint8_t *dst_tmp; 1171cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 1172cabdff1aSopenharmony_ci 1173cabdff1aSopenharmony_ci for (cnt = (width >> 4); cnt--;) { 1174cabdff1aSopenharmony_ci src_tmp = src; 1175cabdff1aSopenharmony_ci dst_tmp = dst; 1176cabdff1aSopenharmony_ci 1177cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 1178cabdff1aSopenharmony_ci LD_UB8(src_tmp, src_stride, 1179cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 1180cabdff1aSopenharmony_ci src_tmp += (8 * src_stride); 1181cabdff1aSopenharmony_ci 1182cabdff1aSopenharmony_ci ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, 1183cabdff1aSopenharmony_ci dst_tmp, dst_stride); 1184cabdff1aSopenharmony_ci dst_tmp += (8 * dst_stride); 1185cabdff1aSopenharmony_ci } 1186cabdff1aSopenharmony_ci 1187cabdff1aSopenharmony_ci src += 16; 1188cabdff1aSopenharmony_ci dst += 16; 1189cabdff1aSopenharmony_ci } 1190cabdff1aSopenharmony_ci} 1191cabdff1aSopenharmony_ci 1192cabdff1aSopenharmony_cistatic void copy_width16_msa(const uint8_t *src, int32_t src_stride, 1193cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1194cabdff1aSopenharmony_ci int32_t height) 1195cabdff1aSopenharmony_ci{ 1196cabdff1aSopenharmony_ci int32_t cnt; 1197cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 1198cabdff1aSopenharmony_ci 1199cabdff1aSopenharmony_ci if (0 == height % 12) { 1200cabdff1aSopenharmony_ci for (cnt = (height / 12); cnt--;) { 1201cabdff1aSopenharmony_ci LD_UB8(src, src_stride, 1202cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 1203cabdff1aSopenharmony_ci src += (8 * src_stride); 1204cabdff1aSopenharmony_ci ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, 1205cabdff1aSopenharmony_ci dst, dst_stride); 1206cabdff1aSopenharmony_ci dst += (8 * dst_stride); 1207cabdff1aSopenharmony_ci 1208cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 1209cabdff1aSopenharmony_ci src += (4 * src_stride); 1210cabdff1aSopenharmony_ci ST_UB4(src0, src1, src2, src3, dst, dst_stride); 1211cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1212cabdff1aSopenharmony_ci } 1213cabdff1aSopenharmony_ci } else if (0 == height % 8) { 1214cabdff1aSopenharmony_ci copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); 1215cabdff1aSopenharmony_ci } else if (0 == height % 4) { 1216cabdff1aSopenharmony_ci for (cnt = (height >> 2); cnt--;) { 1217cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 1218cabdff1aSopenharmony_ci src += (4 * src_stride); 1219cabdff1aSopenharmony_ci 1220cabdff1aSopenharmony_ci ST_UB4(src0, src1, src2, src3, dst, dst_stride); 1221cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1222cabdff1aSopenharmony_ci } 1223cabdff1aSopenharmony_ci } 1224cabdff1aSopenharmony_ci} 1225cabdff1aSopenharmony_ci 1226cabdff1aSopenharmony_cistatic void avg_width4_msa(const uint8_t *src, int32_t src_stride, 1227cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1228cabdff1aSopenharmony_ci int32_t height) 1229cabdff1aSopenharmony_ci{ 1230cabdff1aSopenharmony_ci int32_t cnt; 1231cabdff1aSopenharmony_ci uint32_t out0, out1, out2, out3; 1232cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3; 1233cabdff1aSopenharmony_ci v16u8 dst0, dst1, dst2, dst3; 1234cabdff1aSopenharmony_ci 1235cabdff1aSopenharmony_ci if (0 == (height % 4)) { 1236cabdff1aSopenharmony_ci for (cnt = (height / 4); cnt--;) { 1237cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 1238cabdff1aSopenharmony_ci src += (4 * src_stride); 1239cabdff1aSopenharmony_ci 1240cabdff1aSopenharmony_ci LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 1241cabdff1aSopenharmony_ci 1242cabdff1aSopenharmony_ci AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, 1243cabdff1aSopenharmony_ci dst0, dst1, dst2, dst3); 1244cabdff1aSopenharmony_ci 1245cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) dst0, 0); 1246cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) dst1, 0); 1247cabdff1aSopenharmony_ci out2 = __msa_copy_u_w((v4i32) dst2, 0); 1248cabdff1aSopenharmony_ci out3 = __msa_copy_u_w((v4i32) dst3, 0); 1249cabdff1aSopenharmony_ci SW4(out0, out1, out2, out3, dst, dst_stride); 1250cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1251cabdff1aSopenharmony_ci } 1252cabdff1aSopenharmony_ci } else if (0 == (height % 2)) { 1253cabdff1aSopenharmony_ci for (cnt = (height / 2); cnt--;) { 1254cabdff1aSopenharmony_ci LD_UB2(src, src_stride, src0, src1); 1255cabdff1aSopenharmony_ci src += (2 * src_stride); 1256cabdff1aSopenharmony_ci 1257cabdff1aSopenharmony_ci LD_UB2(dst, dst_stride, dst0, dst1); 1258cabdff1aSopenharmony_ci 1259cabdff1aSopenharmony_ci AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1); 1260cabdff1aSopenharmony_ci 1261cabdff1aSopenharmony_ci out0 = __msa_copy_u_w((v4i32) dst0, 0); 1262cabdff1aSopenharmony_ci out1 = __msa_copy_u_w((v4i32) dst1, 0); 1263cabdff1aSopenharmony_ci SW(out0, dst); 1264cabdff1aSopenharmony_ci dst += dst_stride; 1265cabdff1aSopenharmony_ci SW(out1, dst); 1266cabdff1aSopenharmony_ci dst += dst_stride; 1267cabdff1aSopenharmony_ci } 1268cabdff1aSopenharmony_ci } 1269cabdff1aSopenharmony_ci} 1270cabdff1aSopenharmony_ci 1271cabdff1aSopenharmony_cistatic void avg_width8_msa(const uint8_t *src, int32_t src_stride, 1272cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1273cabdff1aSopenharmony_ci int32_t height) 1274cabdff1aSopenharmony_ci{ 1275cabdff1aSopenharmony_ci int32_t cnt; 1276cabdff1aSopenharmony_ci uint64_t out0, out1, out2, out3; 1277cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3; 1278cabdff1aSopenharmony_ci v16u8 dst0, dst1, dst2, dst3; 1279cabdff1aSopenharmony_ci 1280cabdff1aSopenharmony_ci for (cnt = (height / 4); cnt--;) { 1281cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 1282cabdff1aSopenharmony_ci src += (4 * src_stride); 1283cabdff1aSopenharmony_ci LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 1284cabdff1aSopenharmony_ci 1285cabdff1aSopenharmony_ci AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, 1286cabdff1aSopenharmony_ci dst0, dst1, dst2, dst3); 1287cabdff1aSopenharmony_ci 1288cabdff1aSopenharmony_ci out0 = __msa_copy_u_d((v2i64) dst0, 0); 1289cabdff1aSopenharmony_ci out1 = __msa_copy_u_d((v2i64) dst1, 0); 1290cabdff1aSopenharmony_ci out2 = __msa_copy_u_d((v2i64) dst2, 0); 1291cabdff1aSopenharmony_ci out3 = __msa_copy_u_d((v2i64) dst3, 0); 1292cabdff1aSopenharmony_ci SD4(out0, out1, out2, out3, dst, dst_stride); 1293cabdff1aSopenharmony_ci dst += (4 * dst_stride); 1294cabdff1aSopenharmony_ci } 1295cabdff1aSopenharmony_ci} 1296cabdff1aSopenharmony_ci 1297cabdff1aSopenharmony_cistatic void avg_width16_msa(const uint8_t *src, int32_t src_stride, 1298cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 1299cabdff1aSopenharmony_ci int32_t height) 1300cabdff1aSopenharmony_ci{ 1301cabdff1aSopenharmony_ci int32_t cnt; 1302cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 1303cabdff1aSopenharmony_ci v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 1304cabdff1aSopenharmony_ci 1305cabdff1aSopenharmony_ci for (cnt = (height / 8); cnt--;) { 1306cabdff1aSopenharmony_ci LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 1307cabdff1aSopenharmony_ci src += (8 * src_stride); 1308cabdff1aSopenharmony_ci LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); 1309cabdff1aSopenharmony_ci 1310cabdff1aSopenharmony_ci AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, 1311cabdff1aSopenharmony_ci dst0, dst1, dst2, dst3); 1312cabdff1aSopenharmony_ci AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, 1313cabdff1aSopenharmony_ci dst4, dst5, dst6, dst7); 1314cabdff1aSopenharmony_ci ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride); 1315cabdff1aSopenharmony_ci dst += (8 * dst_stride); 1316cabdff1aSopenharmony_ci } 1317cabdff1aSopenharmony_ci} 1318cabdff1aSopenharmony_ci 1319cabdff1aSopenharmony_civoid ff_put_pixels16_msa(uint8_t *block, const uint8_t *pixels, 1320cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1321cabdff1aSopenharmony_ci{ 1322cabdff1aSopenharmony_ci copy_width16_msa(pixels, line_size, block, line_size, h); 1323cabdff1aSopenharmony_ci} 1324cabdff1aSopenharmony_ci 1325cabdff1aSopenharmony_civoid ff_put_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels, 1326cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1327cabdff1aSopenharmony_ci{ 1328cabdff1aSopenharmony_ci common_hz_bil_16w_msa(pixels, line_size, block, line_size, h); 1329cabdff1aSopenharmony_ci} 1330cabdff1aSopenharmony_ci 1331cabdff1aSopenharmony_civoid ff_put_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels, 1332cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1333cabdff1aSopenharmony_ci{ 1334cabdff1aSopenharmony_ci common_vt_bil_16w_msa(pixels, line_size, block, line_size, h); 1335cabdff1aSopenharmony_ci} 1336cabdff1aSopenharmony_ci 1337cabdff1aSopenharmony_civoid ff_put_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels, 1338cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1339cabdff1aSopenharmony_ci{ 1340cabdff1aSopenharmony_ci common_hv_bil_16w_msa(pixels, line_size, block, line_size, h); 1341cabdff1aSopenharmony_ci} 1342cabdff1aSopenharmony_ci 1343cabdff1aSopenharmony_civoid ff_put_pixels8_msa(uint8_t *block, const uint8_t *pixels, 1344cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1345cabdff1aSopenharmony_ci{ 1346cabdff1aSopenharmony_ci copy_width8_msa(pixels, line_size, block, line_size, h); 1347cabdff1aSopenharmony_ci} 1348cabdff1aSopenharmony_ci 1349cabdff1aSopenharmony_civoid ff_put_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels, 1350cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1351cabdff1aSopenharmony_ci{ 1352cabdff1aSopenharmony_ci common_hz_bil_8w_msa(pixels, line_size, block, line_size, h); 1353cabdff1aSopenharmony_ci} 1354cabdff1aSopenharmony_ci 1355cabdff1aSopenharmony_civoid ff_put_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels, 1356cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1357cabdff1aSopenharmony_ci{ 1358cabdff1aSopenharmony_ci common_vt_bil_8w_msa(pixels, line_size, block, line_size, h); 1359cabdff1aSopenharmony_ci} 1360cabdff1aSopenharmony_ci 1361cabdff1aSopenharmony_civoid ff_put_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels, 1362cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1363cabdff1aSopenharmony_ci{ 1364cabdff1aSopenharmony_ci common_hv_bil_8w_msa(pixels, line_size, block, line_size, h); 1365cabdff1aSopenharmony_ci} 1366cabdff1aSopenharmony_ci 1367cabdff1aSopenharmony_civoid ff_put_pixels4_x2_msa(uint8_t *block, const uint8_t *pixels, 1368cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1369cabdff1aSopenharmony_ci{ 1370cabdff1aSopenharmony_ci common_hz_bil_4w_msa(pixels, line_size, block, line_size, h); 1371cabdff1aSopenharmony_ci} 1372cabdff1aSopenharmony_ci 1373cabdff1aSopenharmony_civoid ff_put_pixels4_y2_msa(uint8_t *block, const uint8_t *pixels, 1374cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1375cabdff1aSopenharmony_ci{ 1376cabdff1aSopenharmony_ci common_vt_bil_4w_msa(pixels, line_size, block, line_size, h); 1377cabdff1aSopenharmony_ci} 1378cabdff1aSopenharmony_ci 1379cabdff1aSopenharmony_civoid ff_put_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels, 1380cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1381cabdff1aSopenharmony_ci{ 1382cabdff1aSopenharmony_ci common_hv_bil_4w_msa(pixels, line_size, block, line_size, h); 1383cabdff1aSopenharmony_ci} 1384cabdff1aSopenharmony_ci 1385cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels, 1386cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1387cabdff1aSopenharmony_ci{ 1388cabdff1aSopenharmony_ci if (h == 16) { 1389cabdff1aSopenharmony_ci common_hz_bil_no_rnd_16x16_msa(pixels, line_size, block, line_size); 1390cabdff1aSopenharmony_ci } else if (h == 8) { 1391cabdff1aSopenharmony_ci common_hz_bil_no_rnd_8x16_msa(pixels, line_size, block, line_size); 1392cabdff1aSopenharmony_ci } 1393cabdff1aSopenharmony_ci} 1394cabdff1aSopenharmony_ci 1395cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels, 1396cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1397cabdff1aSopenharmony_ci{ 1398cabdff1aSopenharmony_ci if (h == 16) { 1399cabdff1aSopenharmony_ci common_vt_bil_no_rnd_16x16_msa(pixels, line_size, block, line_size); 1400cabdff1aSopenharmony_ci } else if (h == 8) { 1401cabdff1aSopenharmony_ci common_vt_bil_no_rnd_8x16_msa(pixels, line_size, block, line_size); 1402cabdff1aSopenharmony_ci } 1403cabdff1aSopenharmony_ci} 1404cabdff1aSopenharmony_ci 1405cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_xy2_msa(uint8_t *block, 1406cabdff1aSopenharmony_ci const uint8_t *pixels, 1407cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1408cabdff1aSopenharmony_ci{ 1409cabdff1aSopenharmony_ci if (h == 16) { 1410cabdff1aSopenharmony_ci common_hv_bil_no_rnd_16x16_msa(pixels, line_size, block, line_size); 1411cabdff1aSopenharmony_ci } else if (h == 8) { 1412cabdff1aSopenharmony_ci common_hv_bil_no_rnd_8x16_msa(pixels, line_size, block, line_size); 1413cabdff1aSopenharmony_ci } 1414cabdff1aSopenharmony_ci} 1415cabdff1aSopenharmony_ci 1416cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels, 1417cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1418cabdff1aSopenharmony_ci{ 1419cabdff1aSopenharmony_ci if (h == 8) { 1420cabdff1aSopenharmony_ci common_hz_bil_no_rnd_8x8_msa(pixels, line_size, block, line_size); 1421cabdff1aSopenharmony_ci } else if (h == 4) { 1422cabdff1aSopenharmony_ci common_hz_bil_no_rnd_4x8_msa(pixels, line_size, block, line_size); 1423cabdff1aSopenharmony_ci } 1424cabdff1aSopenharmony_ci} 1425cabdff1aSopenharmony_ci 1426cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels, 1427cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1428cabdff1aSopenharmony_ci{ 1429cabdff1aSopenharmony_ci if (h == 8) { 1430cabdff1aSopenharmony_ci common_vt_bil_no_rnd_8x8_msa(pixels, line_size, block, line_size); 1431cabdff1aSopenharmony_ci } else if (h == 4) { 1432cabdff1aSopenharmony_ci common_vt_bil_no_rnd_4x8_msa(pixels, line_size, block, line_size); 1433cabdff1aSopenharmony_ci } 1434cabdff1aSopenharmony_ci} 1435cabdff1aSopenharmony_ci 1436cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels, 1437cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1438cabdff1aSopenharmony_ci{ 1439cabdff1aSopenharmony_ci if (h == 8) { 1440cabdff1aSopenharmony_ci common_hv_bil_no_rnd_8x8_msa(pixels, line_size, block, line_size); 1441cabdff1aSopenharmony_ci } else if (h == 4) { 1442cabdff1aSopenharmony_ci common_hv_bil_no_rnd_4x8_msa(pixels, line_size, block, line_size); 1443cabdff1aSopenharmony_ci } 1444cabdff1aSopenharmony_ci} 1445cabdff1aSopenharmony_ci 1446cabdff1aSopenharmony_civoid ff_avg_pixels16_msa(uint8_t *block, const uint8_t *pixels, 1447cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1448cabdff1aSopenharmony_ci{ 1449cabdff1aSopenharmony_ci avg_width16_msa(pixels, line_size, block, line_size, h); 1450cabdff1aSopenharmony_ci} 1451cabdff1aSopenharmony_ci 1452cabdff1aSopenharmony_civoid ff_avg_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels, 1453cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1454cabdff1aSopenharmony_ci{ 1455cabdff1aSopenharmony_ci common_hz_bil_and_aver_dst_16w_msa(pixels, line_size, block, line_size, h); 1456cabdff1aSopenharmony_ci} 1457cabdff1aSopenharmony_ci 1458cabdff1aSopenharmony_civoid ff_avg_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels, 1459cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1460cabdff1aSopenharmony_ci{ 1461cabdff1aSopenharmony_ci common_vt_bil_and_aver_dst_16w_msa(pixels, line_size, block, line_size, h); 1462cabdff1aSopenharmony_ci} 1463cabdff1aSopenharmony_ci 1464cabdff1aSopenharmony_civoid ff_avg_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels, 1465cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1466cabdff1aSopenharmony_ci{ 1467cabdff1aSopenharmony_ci common_hv_bil_and_aver_dst_16w_msa(pixels, line_size, block, line_size, h); 1468cabdff1aSopenharmony_ci} 1469cabdff1aSopenharmony_ci 1470cabdff1aSopenharmony_civoid ff_avg_pixels8_msa(uint8_t *block, const uint8_t *pixels, 1471cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1472cabdff1aSopenharmony_ci{ 1473cabdff1aSopenharmony_ci avg_width8_msa(pixels, line_size, block, line_size, h); 1474cabdff1aSopenharmony_ci} 1475cabdff1aSopenharmony_ci 1476cabdff1aSopenharmony_civoid ff_avg_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels, 1477cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1478cabdff1aSopenharmony_ci{ 1479cabdff1aSopenharmony_ci common_hz_bil_and_aver_dst_8w_msa(pixels, line_size, block, line_size, h); 1480cabdff1aSopenharmony_ci} 1481cabdff1aSopenharmony_ci 1482cabdff1aSopenharmony_civoid ff_avg_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels, 1483cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1484cabdff1aSopenharmony_ci{ 1485cabdff1aSopenharmony_ci common_vt_bil_and_aver_dst_8w_msa(pixels, line_size, block, line_size, h); 1486cabdff1aSopenharmony_ci} 1487cabdff1aSopenharmony_ci 1488cabdff1aSopenharmony_civoid ff_avg_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels, 1489cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1490cabdff1aSopenharmony_ci{ 1491cabdff1aSopenharmony_ci common_hv_bil_and_aver_dst_8w_msa(pixels, line_size, block, line_size, h); 1492cabdff1aSopenharmony_ci} 1493cabdff1aSopenharmony_ci 1494cabdff1aSopenharmony_civoid ff_avg_pixels4_msa(uint8_t *block, const uint8_t *pixels, 1495cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1496cabdff1aSopenharmony_ci{ 1497cabdff1aSopenharmony_ci avg_width4_msa(pixels, line_size, block, line_size, h); 1498cabdff1aSopenharmony_ci} 1499cabdff1aSopenharmony_ci 1500cabdff1aSopenharmony_civoid ff_avg_pixels4_x2_msa(uint8_t *block, const uint8_t *pixels, 1501cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1502cabdff1aSopenharmony_ci{ 1503cabdff1aSopenharmony_ci common_hz_bil_and_aver_dst_4w_msa(pixels, line_size, block, line_size, h); 1504cabdff1aSopenharmony_ci} 1505cabdff1aSopenharmony_ci 1506cabdff1aSopenharmony_civoid ff_avg_pixels4_y2_msa(uint8_t *block, const uint8_t *pixels, 1507cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1508cabdff1aSopenharmony_ci{ 1509cabdff1aSopenharmony_ci common_vt_bil_and_aver_dst_4w_msa(pixels, line_size, block, line_size, h); 1510cabdff1aSopenharmony_ci} 1511cabdff1aSopenharmony_ci 1512cabdff1aSopenharmony_civoid ff_avg_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels, 1513cabdff1aSopenharmony_ci ptrdiff_t line_size, int h) 1514cabdff1aSopenharmony_ci{ 1515cabdff1aSopenharmony_ci common_hv_bil_and_aver_dst_4w_msa(pixels, line_size, block, line_size, h); 1516cabdff1aSopenharmony_ci} 1517