1;****************************************************************************** 2;* SIMD-optimized HuffYUV functions 3;* Copyright (c) 2008 Loren Merritt 4;* Copyright (c) 2014 Christophe Gisquet 5;* 6;* This file is part of FFmpeg. 7;* 8;* FFmpeg is free software; you can redistribute it and/or 9;* modify it under the terms of the GNU Lesser General Public 10;* License as published by the Free Software Foundation; either 11;* version 2.1 of the License, or (at your option) any later version. 12;* 13;* FFmpeg is distributed in the hope that it will be useful, 14;* but WITHOUT ANY WARRANTY; without even the implied warranty of 15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16;* Lesser General Public License for more details. 17;* 18;* You should have received a copy of the GNU Lesser General Public 19;* License along with FFmpeg; if not, write to the Free Software 20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21;****************************************************************************** 22 23%include "libavutil/x86/x86util.asm" 24 25SECTION .text 26 27%include "libavcodec/x86/huffyuvdsp_template.asm" 28 29;------------------------------------------------------------------------------ 30; void (*add_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w); 31;------------------------------------------------------------------------------ 32 33%macro ADD_INT16 0 34cglobal add_int16, 4,4,5, dst, src, mask, w, tmp 35 test srcq, mmsize-1 36 jnz .unaligned 37 test dstq, mmsize-1 38 jnz .unaligned 39 INT16_LOOP a, add 40.unaligned: 41 INT16_LOOP u, add 42%endmacro 43 44INIT_XMM sse2 45ADD_INT16 46 47%if HAVE_AVX2_EXTERNAL 48INIT_YMM avx2 49ADD_INT16 50%endif 51 52; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, 53; intptr_t w, uint8_t *left) 54INIT_XMM sse2 55cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left 56 shl wq, 2 57 movd m0, [leftq] 58 lea dstq, [dstq + wq] 59 lea srcq, [srcq + wq] 60 LSHIFT m0, mmsize-4 61 neg wq 62.loop: 63 movu m1, [srcq+wq] 64 mova m2, m1 65 LSHIFT m1, 4 66 paddb m1, m2 67 pshufd m0, m0, q3333 68 mova m2, m1 69 LSHIFT m1, 8 70 paddb m1, m2 71 paddb m0, m1 72 movu [dstq+wq], m0 73 add wq, mmsize 74 jl .loop 75 movd m0, [dstq-4] 76 movd [leftq], m0 77 REP_RET 78 79 80; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) 81INIT_MMX mmxext 82cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top 83 add wd, wd 84 movd mm6, maskd 85 SPLATW mm6, mm6 86 movq mm0, [topq] 87 movq mm2, mm0 88 movd mm4, [left_topq] 89 psllq mm2, 16 90 movq mm1, mm0 91 por mm4, mm2 92 movd mm3, [leftq] 93 psubw mm0, mm4 ; t-tl 94 add dstq, wq 95 add topq, wq 96 add diffq, wq 97 neg wq 98 jmp .skip 99.loop: 100 movq mm4, [topq+wq] 101 movq mm0, mm4 102 psllq mm4, 16 103 por mm4, mm1 104 movq mm1, mm0 ; t 105 psubw mm0, mm4 ; t-tl 106.skip: 107 movq mm2, [diffq+wq] 108%assign i 0 109%rep 4 110 movq mm4, mm0 111 paddw mm4, mm3 ; t-tl+l 112 pand mm4, mm6 113 movq mm5, mm3 114 pmaxsw mm3, mm1 115 pminsw mm5, mm1 116 pminsw mm3, mm4 117 pmaxsw mm3, mm5 ; median 118 paddw mm3, mm2 ; +residual 119 pand mm3, mm6 120%if i==0 121 movq mm7, mm3 122 psllq mm7, 48 123%else 124 movq mm4, mm3 125 psrlq mm7, 16 126 psllq mm4, 48 127 por mm7, mm4 128%endif 129%if i<3 130 psrlq mm0, 16 131 psrlq mm1, 16 132 psrlq mm2, 16 133%endif 134%assign i i+1 135%endrep 136 movq [dstq+wq], mm7 137 add wq, 8 138 jl .loop 139 movzx r2d, word [dstq-2] 140 mov [leftq], r2d 141 movzx r2d, word [topq-2] 142 mov [left_topq], r2d 143 RET 144