1/* 2 * Loongson SIMD optimized pixblockdsp 3 * 4 * Copyright (c) 2015 Loongson Technology Corporation Limited 5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include "pixblockdsp_mips.h" 25#include "libavutil/mips/asmdefs.h" 26#include "libavutil/mips/mmiutils.h" 27 28void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels, 29 ptrdiff_t stride) 30{ 31 double ftmp[7]; 32 DECLARE_VAR_ALL64; 33 DECLARE_VAR_ADDRT; 34 35 __asm__ volatile ( 36 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 37 38 MMI_LDC1(%[ftmp1], %[pixels], 0x00) 39 MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) 40 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 41 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 42 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 43 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 44 MMI_SDC1(%[ftmp3], %[block], 0x00) 45 MMI_SDC1(%[ftmp4], %[block], 0x08) 46 MMI_SDC1(%[ftmp5], %[block], 0x10) 47 MMI_SDC1(%[ftmp6], %[block], 0x18) 48 PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" 49 50 MMI_LDC1(%[ftmp1], %[pixels], 0x00) 51 MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) 52 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 53 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 54 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 55 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 56 MMI_SDC1(%[ftmp3], %[block], 0x20) 57 MMI_SDC1(%[ftmp4], %[block], 0x28) 58 MMI_SDC1(%[ftmp5], %[block], 0x30) 59 MMI_SDC1(%[ftmp6], %[block], 0x38) 60 PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" 61 62 MMI_LDC1(%[ftmp1], %[pixels], 0x00) 63 MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) 64 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 65 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 66 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 67 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 68 MMI_SDC1(%[ftmp3], %[block], 0x40) 69 MMI_SDC1(%[ftmp4], %[block], 0x48) 70 MMI_SDC1(%[ftmp5], %[block], 0x50) 71 MMI_SDC1(%[ftmp6], %[block], 0x58) 72 PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t" 73 74 MMI_LDC1(%[ftmp1], %[pixels], 0x00) 75 MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00) 76 "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" 77 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" 78 "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" 79 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 80 MMI_SDC1(%[ftmp3], %[block], 0x60) 81 MMI_SDC1(%[ftmp4], %[block], 0x68) 82 MMI_SDC1(%[ftmp5], %[block], 0x70) 83 MMI_SDC1(%[ftmp6], %[block], 0x78) 84 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 85 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 86 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 87 [ftmp6]"=&f"(ftmp[6]), 88 RESTRICT_ASM_ALL64 89 RESTRICT_ASM_ADDRT 90 [pixels]"+&r"(pixels) 91 : [block]"r"((mips_reg)block), [stride]"r"((mips_reg)stride), 92 [stride_x2]"r"((mips_reg)(stride<<1)) 93 : "memory" 94 ); 95} 96 97void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1, 98 const uint8_t *src2, ptrdiff_t stride) 99{ 100 double ftmp[5]; 101 mips_reg tmp[1]; 102 DECLARE_VAR_ALL64; 103 104 __asm__ volatile ( 105 "li %[tmp0], 0x08 \n\t" 106 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t" 107 "1: \n\t" 108 MMI_LDC1(%[ftmp0], %[src1], 0x00) 109 "por %[ftmp1], %[ftmp0], %[ftmp0] \n\t" 110 MMI_LDC1(%[ftmp2], %[src2], 0x00) 111 "por %[ftmp3], %[ftmp2], %[ftmp2] \n\t" 112 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" 113 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t" 114 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" 115 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" 116 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t" 117 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" 118 MMI_SDC1(%[ftmp0], %[block], 0x00) 119 MMI_SDC1(%[ftmp1], %[block], 0x08) 120 PTR_ADDI "%[tmp0], %[tmp0], -0x01 \n\t" 121 PTR_ADDIU "%[block], %[block], 0x10 \n\t" 122 PTR_ADDU "%[src1], %[src1], %[stride] \n\t" 123 PTR_ADDU "%[src2], %[src2], %[stride] \n\t" 124 "bgtz %[tmp0], 1b \n\t" 125 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 126 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 127 [ftmp4]"=&f"(ftmp[4]), 128 [tmp0]"=&r"(tmp[0]), 129 RESTRICT_ASM_ALL64 130 [block]"+&r"(block), [src1]"+&r"(src1), 131 [src2]"+&r"(src2) 132 : [stride]"r"((mips_reg)stride) 133 : "memory" 134 ); 135} 136