1/* 2 * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com) 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/mips/generic_macros_msa.h" 22#include "idctdsp_mips.h" 23 24static void put_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, 25 int32_t stride) 26{ 27 uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d; 28 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; 29 30 LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7); 31 CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7); 32 PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3); 33 PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7); 34 35 in0_d = __msa_copy_u_d((v2i64) in0, 0); 36 in1_d = __msa_copy_u_d((v2i64) in1, 0); 37 in2_d = __msa_copy_u_d((v2i64) in2, 0); 38 in3_d = __msa_copy_u_d((v2i64) in3, 0); 39 in4_d = __msa_copy_u_d((v2i64) in4, 0); 40 in5_d = __msa_copy_u_d((v2i64) in5, 0); 41 in6_d = __msa_copy_u_d((v2i64) in6, 0); 42 in7_d = __msa_copy_u_d((v2i64) in7, 0); 43 SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride); 44 pixels += 4 * stride; 45 SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride); 46} 47 48static void put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, 49 int32_t stride) 50{ 51 uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d; 52 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; 53 54 LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7); 55 56 in0 += 128; 57 in1 += 128; 58 in2 += 128; 59 in3 += 128; 60 in4 += 128; 61 in5 += 128; 62 in6 += 128; 63 in7 += 128; 64 65 CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7); 66 PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3); 67 PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7); 68 69 in0_d = __msa_copy_u_d((v2i64) in0, 0); 70 in1_d = __msa_copy_u_d((v2i64) in1, 0); 71 in2_d = __msa_copy_u_d((v2i64) in2, 0); 72 in3_d = __msa_copy_u_d((v2i64) in3, 0); 73 in4_d = __msa_copy_u_d((v2i64) in4, 0); 74 in5_d = __msa_copy_u_d((v2i64) in5, 0); 75 in6_d = __msa_copy_u_d((v2i64) in6, 0); 76 in7_d = __msa_copy_u_d((v2i64) in7, 0); 77 SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride); 78 pixels += 4 * stride; 79 SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride); 80} 81 82static void add_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, 83 int32_t stride) 84{ 85 uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d; 86 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; 87 v16u8 pix_in0, pix_in1, pix_in2, pix_in3; 88 v16u8 pix_in4, pix_in5, pix_in6, pix_in7; 89 v8u16 pix0, pix1, pix2, pix3, pix4, pix5, pix6, pix7; 90 v8i16 zero = { 0 }; 91 92 LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7); 93 LD_UB8(pixels, stride, pix_in0, pix_in1, pix_in2, 94 pix_in3, pix_in4, pix_in5, pix_in6, pix_in7); 95 96 ILVR_B4_UH(zero, pix_in0, zero, pix_in1, zero, pix_in2, zero, pix_in3, 97 pix0, pix1, pix2, pix3); 98 ILVR_B4_UH(zero, pix_in4, zero, pix_in5, zero, pix_in6, zero, pix_in7, 99 pix4, pix5, pix6, pix7); 100 101 in0 += (v8i16) pix0; 102 in1 += (v8i16) pix1; 103 in2 += (v8i16) pix2; 104 in3 += (v8i16) pix3; 105 in4 += (v8i16) pix4; 106 in5 += (v8i16) pix5; 107 in6 += (v8i16) pix6; 108 in7 += (v8i16) pix7; 109 110 CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7); 111 PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3); 112 PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7); 113 114 in0_d = __msa_copy_u_d((v2i64) in0, 0); 115 in1_d = __msa_copy_u_d((v2i64) in1, 0); 116 in2_d = __msa_copy_u_d((v2i64) in2, 0); 117 in3_d = __msa_copy_u_d((v2i64) in3, 0); 118 in4_d = __msa_copy_u_d((v2i64) in4, 0); 119 in5_d = __msa_copy_u_d((v2i64) in5, 0); 120 in6_d = __msa_copy_u_d((v2i64) in6, 0); 121 in7_d = __msa_copy_u_d((v2i64) in7, 0); 122 SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride); 123 pixels += 4 * stride; 124 SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride); 125} 126 127void ff_put_pixels_clamped_msa(const int16_t *block, 128 uint8_t *av_restrict pixels, 129 ptrdiff_t line_size) 130{ 131 put_pixels_clamped_msa(block, pixels, line_size); 132} 133 134void ff_put_signed_pixels_clamped_msa(const int16_t *block, 135 uint8_t *av_restrict pixels, 136 ptrdiff_t line_size) 137{ 138 put_signed_pixels_clamped_msa(block, pixels, line_size); 139} 140 141void ff_add_pixels_clamped_msa(const int16_t *block, 142 uint8_t *av_restrict pixels, 143 ptrdiff_t line_size) 144{ 145 add_pixels_clamped_msa(block, pixels, line_size); 146} 147