1/* 2 * Loongson SIMD optimized idctdsp 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include "idctdsp_mips.h" 25#include "constants.h" 26#include "libavutil/mips/mmiutils.h" 27 28void ff_put_pixels_clamped_mmi(const int16_t *block, 29 uint8_t *av_restrict pixels, ptrdiff_t line_size) 30{ 31 double ftmp[8]; 32 33 __asm__ volatile ( 34 MMI_LDC1(%[ftmp0], %[block], 0x00) 35 MMI_LDC1(%[ftmp1], %[block], 0x08) 36 MMI_LDC1(%[ftmp2], %[block], 0x10) 37 MMI_LDC1(%[ftmp3], %[block], 0x18) 38 MMI_LDC1(%[ftmp4], %[block], 0x20) 39 MMI_LDC1(%[ftmp5], %[block], 0x28) 40 MMI_LDC1(%[ftmp6], %[block], 0x30) 41 MMI_LDC1(%[ftmp7], %[block], 0x38) 42 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 43 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 44 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 45 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 46 MMI_SDC1(%[ftmp0], %[pixels], 0x00) 47 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 48 MMI_SDC1(%[ftmp2], %[pixels], 0x00) 49 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 50 MMI_SDC1(%[ftmp4], %[pixels], 0x00) 51 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 52 MMI_SDC1(%[ftmp6], %[pixels], 0x00) 53 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 54 55 MMI_LDC1(%[ftmp0], %[block], 0x40) 56 MMI_LDC1(%[ftmp1], %[block], 0x48) 57 MMI_LDC1(%[ftmp2], %[block], 0x50) 58 MMI_LDC1(%[ftmp3], %[block], 0x58) 59 MMI_LDC1(%[ftmp4], %[block], 0x60) 60 MMI_LDC1(%[ftmp5], %[block], 0x68) 61 MMI_LDC1(%[ftmp6], %[block], 0x70) 62 MMI_LDC1(%[ftmp7], %[block], 0x78) 63 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" 64 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" 65 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 66 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t" 67 MMI_SDC1(%[ftmp0], %[pixels], 0x00) 68 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 69 MMI_SDC1(%[ftmp2], %[pixels], 0x00) 70 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 71 MMI_SDC1(%[ftmp4], %[pixels], 0x00) 72 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 73 MMI_SDC1(%[ftmp6], %[pixels], 0x00) 74 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 75 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 76 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 77 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 78 [pixels]"+&r"(pixels) 79 : [line_size]"r"((mips_reg)line_size), 80 [block]"r"(block) 81 : "memory" 82 ); 83} 84 85void ff_put_signed_pixels_clamped_mmi(const int16_t *block, 86 uint8_t *av_restrict pixels, ptrdiff_t line_size) 87{ 88 double ftmp[5]; 89 90 __asm__ volatile ( 91 MMI_LDC1(%[ftmp1], %[block], 0x00) 92 MMI_LDC1(%[ftmp0], %[block], 0x08) 93 "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 94 MMI_LDC1(%[ftmp2], %[block], 0x10) 95 MMI_LDC1(%[ftmp0], %[block], 0x18) 96 "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 97 MMI_LDC1(%[ftmp3], %[block], 0x20) 98 MMI_LDC1(%[ftmp0], %[block], 0x28) 99 "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 100 MMI_LDC1(%[ftmp4], %[block], 0x30) 101 MMI_LDC1(%[ftmp0], %[block], 0x38) 102 "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 103 "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t" 104 "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t" 105 "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" 106 "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" 107 MMI_SDC1(%[ftmp1], %[pixels], 0x00) 108 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 109 MMI_SDC1(%[ftmp2], %[pixels], 0x00) 110 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 111 MMI_SDC1(%[ftmp3], %[pixels], 0x00) 112 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 113 MMI_SDC1(%[ftmp4], %[pixels], 0x00) 114 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 115 116 MMI_LDC1(%[ftmp1], %[block], 0x40) 117 MMI_LDC1(%[ftmp0], %[block], 0x48) 118 "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 119 MMI_LDC1(%[ftmp2], %[block], 0x50) 120 MMI_LDC1(%[ftmp0], %[block], 0x58) 121 "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 122 MMI_LDC1(%[ftmp3], %[block], 0x60) 123 MMI_LDC1(%[ftmp0], %[block], 0x68) 124 "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 125 MMI_LDC1(%[ftmp4], %[block], 0x70) 126 MMI_LDC1(%[ftmp0], %[block], 0x78) 127 "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 128 "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t" 129 "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t" 130 "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" 131 "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" 132 MMI_SDC1(%[ftmp1], %[pixels], 0x00) 133 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 134 MMI_SDC1(%[ftmp2], %[pixels], 0x00) 135 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 136 MMI_SDC1(%[ftmp3], %[pixels], 0x00) 137 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 138 MMI_SDC1(%[ftmp4], %[pixels], 0x00) 139 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 140 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 141 [ftmp4]"=&f"(ftmp[4]), 142 [pixels]"+&r"(pixels) 143 : [block]"r"(block), 144 [line_size]"r"((mips_reg)line_size), 145 [ff_pb_80]"f"(ff_pb_80.f) 146 : "memory" 147 ); 148} 149 150void ff_add_pixels_clamped_mmi(const int16_t *block, 151 uint8_t *av_restrict pixels, ptrdiff_t line_size) 152{ 153 double ftmp[9]; 154 uint64_t tmp[1]; 155 __asm__ volatile ( 156 "li %[tmp0], 0x04 \n\t" 157 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 158 "1: \n\t" 159 MMI_LDC1(%[ftmp5], %[pixels], 0x00) 160 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 161 MMI_LDC1(%[ftmp6], %[pixels], 0x00) 162 PTR_SUBU "%[pixels], %[pixels], %[line_size] \n\t" 163 MMI_LDC1(%[ftmp1], %[block], 0x00) 164 MMI_LDC1(%[ftmp2], %[block], 0x08) 165 MMI_LDC1(%[ftmp3], %[block], 0x10) 166 MMI_LDC1(%[ftmp4], %[block], 0x18) 167 PTR_ADDIU "%[block], %[block], 0x20 \n\t" 168 "punpckhbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t" 169 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 170 "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" 171 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 172 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 173 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" 174 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" 175 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 176 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 177 "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 178 MMI_SDC1(%[ftmp1], %[pixels], 0x00) 179 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 180 MMI_SDC1(%[ftmp3], %[pixels], 0x00) 181 "addi %[tmp0], %[tmp0], -0x01 \n\t" 182 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t" 183 "bnez %[tmp0], 1b \n\t" 184 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 185 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 186 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 187 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 188 [ftmp8]"=&f"(ftmp[8]), [tmp0]"=&r"(tmp[0]), 189 [pixels]"+&r"(pixels), [block]"+&r"(block) 190 : [line_size]"r"((mips_reg)line_size) 191 : "memory" 192 ); 193} 194