1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com) 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/mips/generic_macros_msa.h" 22cabdff1aSopenharmony_ci#include "pixblockdsp_mips.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_cistatic void diff_pixels_msa(int16_t *block, const uint8_t *src1, 25cabdff1aSopenharmony_ci const uint8_t *src2, int32_t stride) 26cabdff1aSopenharmony_ci{ 27cabdff1aSopenharmony_ci v16u8 in10, in11, in12, in13, in14, in15, in16, in17; 28cabdff1aSopenharmony_ci v16u8 in20, in21, in22, in23, in24, in25, in26, in27; 29cabdff1aSopenharmony_ci v8i16 out0, out1, out2, out3, out4, out5, out6, out7; 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17); 32cabdff1aSopenharmony_ci LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27); 33cabdff1aSopenharmony_ci ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23, 34cabdff1aSopenharmony_ci out0, out1, out2, out3); 35cabdff1aSopenharmony_ci ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27, 36cabdff1aSopenharmony_ci out4, out5, out6, out7); 37cabdff1aSopenharmony_ci HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3); 38cabdff1aSopenharmony_ci HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7); 39cabdff1aSopenharmony_ci ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8); 40cabdff1aSopenharmony_ci} 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_cistatic void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride, 43cabdff1aSopenharmony_ci int16_t *dst, int32_t dst_stride, 44cabdff1aSopenharmony_ci int32_t height) 45cabdff1aSopenharmony_ci{ 46cabdff1aSopenharmony_ci uint8_t *dst_ptr; 47cabdff1aSopenharmony_ci int32_t cnt; 48cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3; 49cabdff1aSopenharmony_ci v16i8 zero = { 0 }; 50cabdff1aSopenharmony_ci 51cabdff1aSopenharmony_ci dst_ptr = (uint8_t *) dst; 52cabdff1aSopenharmony_ci 53cabdff1aSopenharmony_ci for (cnt = (height >> 2); cnt--;) { 54cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 55cabdff1aSopenharmony_ci src += (4 * src_stride); 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_ci ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3, 58cabdff1aSopenharmony_ci src0, src1, src2, src3); 59cabdff1aSopenharmony_ci 60cabdff1aSopenharmony_ci ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2)); 61cabdff1aSopenharmony_ci dst_ptr += (4 * 2 * dst_stride); 62cabdff1aSopenharmony_ci } 63cabdff1aSopenharmony_ci} 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_cistatic void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, 66cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 67cabdff1aSopenharmony_ci int32_t height, int32_t width) 68cabdff1aSopenharmony_ci{ 69cabdff1aSopenharmony_ci int32_t cnt, loop_cnt; 70cabdff1aSopenharmony_ci const uint8_t *src_tmp; 71cabdff1aSopenharmony_ci uint8_t *dst_tmp; 72cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ci for (cnt = (width >> 4); cnt--;) { 75cabdff1aSopenharmony_ci src_tmp = src; 76cabdff1aSopenharmony_ci dst_tmp = dst; 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci for (loop_cnt = (height >> 3); loop_cnt--;) { 79cabdff1aSopenharmony_ci LD_UB8(src_tmp, src_stride, 80cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 81cabdff1aSopenharmony_ci src_tmp += (8 * src_stride); 82cabdff1aSopenharmony_ci 83cabdff1aSopenharmony_ci ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, 84cabdff1aSopenharmony_ci dst_tmp, dst_stride); 85cabdff1aSopenharmony_ci dst_tmp += (8 * dst_stride); 86cabdff1aSopenharmony_ci } 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci src += 16; 89cabdff1aSopenharmony_ci dst += 16; 90cabdff1aSopenharmony_ci } 91cabdff1aSopenharmony_ci} 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_cistatic void copy_width16_msa(const uint8_t *src, int32_t src_stride, 94cabdff1aSopenharmony_ci uint8_t *dst, int32_t dst_stride, 95cabdff1aSopenharmony_ci int32_t height) 96cabdff1aSopenharmony_ci{ 97cabdff1aSopenharmony_ci int32_t cnt; 98cabdff1aSopenharmony_ci v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 99cabdff1aSopenharmony_ci 100cabdff1aSopenharmony_ci if (0 == height % 12) { 101cabdff1aSopenharmony_ci for (cnt = (height / 12); cnt--;) { 102cabdff1aSopenharmony_ci LD_UB8(src, src_stride, 103cabdff1aSopenharmony_ci src0, src1, src2, src3, src4, src5, src6, src7); 104cabdff1aSopenharmony_ci src += (8 * src_stride); 105cabdff1aSopenharmony_ci ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, 106cabdff1aSopenharmony_ci dst, dst_stride); 107cabdff1aSopenharmony_ci dst += (8 * dst_stride); 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 110cabdff1aSopenharmony_ci src += (4 * src_stride); 111cabdff1aSopenharmony_ci ST_UB4(src0, src1, src2, src3, dst, dst_stride); 112cabdff1aSopenharmony_ci dst += (4 * dst_stride); 113cabdff1aSopenharmony_ci } 114cabdff1aSopenharmony_ci } else if (0 == height % 8) { 115cabdff1aSopenharmony_ci copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); 116cabdff1aSopenharmony_ci } else if (0 == height % 4) { 117cabdff1aSopenharmony_ci for (cnt = (height >> 2); cnt--;) { 118cabdff1aSopenharmony_ci LD_UB4(src, src_stride, src0, src1, src2, src3); 119cabdff1aSopenharmony_ci src += (4 * src_stride); 120cabdff1aSopenharmony_ci 121cabdff1aSopenharmony_ci ST_UB4(src0, src1, src2, src3, dst, dst_stride); 122cabdff1aSopenharmony_ci dst += (4 * dst_stride); 123cabdff1aSopenharmony_ci } 124cabdff1aSopenharmony_ci } 125cabdff1aSopenharmony_ci} 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_civoid ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src, 128cabdff1aSopenharmony_ci ptrdiff_t stride) 129cabdff1aSopenharmony_ci{ 130cabdff1aSopenharmony_ci copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8); 131cabdff1aSopenharmony_ci} 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_civoid ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src, 134cabdff1aSopenharmony_ci ptrdiff_t stride) 135cabdff1aSopenharmony_ci{ 136cabdff1aSopenharmony_ci copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8); 137cabdff1aSopenharmony_ci} 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_civoid ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1, 140cabdff1aSopenharmony_ci const uint8_t *src2, ptrdiff_t stride) 141cabdff1aSopenharmony_ci{ 142cabdff1aSopenharmony_ci diff_pixels_msa(block, src1, src2, stride); 143cabdff1aSopenharmony_ci} 144