1;************************************************************************
2;* SIMD-optimized HuffYUV encoding functions
3;* Copyright (c) 2000, 2001 Fabrice Bellard
4;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5;*
6;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
8;*
9;* This file is part of FFmpeg.
10;*
11;* FFmpeg is free software; you can redistribute it and/or
12;* modify it under the terms of the GNU Lesser General Public
13;* License as published by the Free Software Foundation; either
14;* version 2.1 of the License, or (at your option) any later version.
15;*
16;* FFmpeg is distributed in the hope that it will be useful,
17;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19;* Lesser General Public License for more details.
20;*
21;* You should have received a copy of the GNU Lesser General Public
22;* License along with FFmpeg; if not, write to the Free Software
23;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24;******************************************************************************
25
26%include "libavutil/x86/x86util.asm"
27
28SECTION .text
29
30%include "libavcodec/x86/huffyuvdsp_template.asm"
31
32;------------------------------------------------------------------------------
33; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
34;                    unsigned mask, int w);
35;------------------------------------------------------------------------------
36
37%macro DIFF_INT16 0
38cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
39    test src1q, mmsize-1
40    jnz .unaligned
41    test src2q, mmsize-1
42    jnz .unaligned
43    test dstq, mmsize-1
44    jnz .unaligned
45    INT16_LOOP a, sub
46.unaligned:
47    INT16_LOOP u, sub
48%endmacro
49
50INIT_XMM sse2
51DIFF_INT16
52
53%if HAVE_AVX2_EXTERNAL
54INIT_YMM avx2
55DIFF_INT16
56%endif
57
58INIT_MMX mmxext
59cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
60    add      wd, wd
61    movd    mm7, maskd
62    SPLATW  mm7, mm7
63    movq    mm0, [src1q]
64    movq    mm2, [src2q]
65    psllq   mm0, 16
66    psllq   mm2, 16
67    movd    mm6, [left_topq]
68    por     mm0, mm6
69    movd    mm6, [leftq]
70    por     mm2, mm6
71    xor     maskq, maskq
72.loop:
73    movq    mm1, [src1q + maskq]
74    movq    mm3, [src2q + maskq]
75    movq    mm4, mm2
76    psubw   mm2, mm0
77    paddw   mm2, mm1
78    pand    mm2, mm7
79    movq    mm5, mm4
80    pmaxsw  mm4, mm1
81    pminsw  mm1, mm5
82    pminsw  mm4, mm2
83    pmaxsw  mm4, mm1
84    psubw   mm3, mm4
85    pand    mm3, mm7
86    movq    [dstq + maskq], mm3
87    add     maskq, 8
88    movq    mm0, [src1q + maskq - 2]
89    movq    mm2, [src2q + maskq - 2]
90    cmp     maskq, wq
91        jb .loop
92    movzx maskd, word [src1q + wq - 2]
93    mov [left_topq], maskd
94    movzx maskd, word [src2q + wq - 2]
95    mov [leftq], maskd
96    RET
97