1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD-optimized fullpel functions
3cabdff1aSopenharmony_ci;* Copyright (c) 2008 Loren Merritt
4cabdff1aSopenharmony_ci;* Copyright (c) 2003-2013 Michael Niedermayer
5cabdff1aSopenharmony_ci;* Copyright (c) 2013 Daniel Kang
6cabdff1aSopenharmony_ci;*
7cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
8cabdff1aSopenharmony_ci;*
9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci;*
14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
18cabdff1aSopenharmony_ci;*
19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci;******************************************************************************
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ciSECTION .text
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci%macro PAVGB_MMX 4
29cabdff1aSopenharmony_ci    LOAD   %3, %1
30cabdff1aSopenharmony_ci    por    %3, %2
31cabdff1aSopenharmony_ci    pxor   %2, %1
32cabdff1aSopenharmony_ci    pand   %2, %4
33cabdff1aSopenharmony_ci    psrlq  %2, 1
34cabdff1aSopenharmony_ci    psubb  %3, %2
35cabdff1aSopenharmony_ci    SWAP   %2, %3
36cabdff1aSopenharmony_ci%endmacro
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_ci; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
39cabdff1aSopenharmony_ci;                        ptrdiff_t line_size, int h)
40cabdff1aSopenharmony_ci%macro OP_PIXELS 2
41cabdff1aSopenharmony_ci%if %2 == mmsize/2
42cabdff1aSopenharmony_ci%define LOAD movh
43cabdff1aSopenharmony_ci%define SAVE movh
44cabdff1aSopenharmony_ci%define LEN  mmsize
45cabdff1aSopenharmony_ci%else
46cabdff1aSopenharmony_ci%define LOAD movu
47cabdff1aSopenharmony_ci%define SAVE mova
48cabdff1aSopenharmony_ci%define LEN  %2
49cabdff1aSopenharmony_ci%endif
50cabdff1aSopenharmony_cicglobal %1_pixels%2, 4,5,4
51cabdff1aSopenharmony_ci    lea          r4, [r2*3]
52cabdff1aSopenharmony_ci%ifidn %1, avg
53cabdff1aSopenharmony_ci%if notcpuflag(mmxext)
54cabdff1aSopenharmony_ci    pcmpeqd      m6, m6
55cabdff1aSopenharmony_ci    paddb        m6, m6
56cabdff1aSopenharmony_ci%endif
57cabdff1aSopenharmony_ci%endif
58cabdff1aSopenharmony_ci.loop:
59cabdff1aSopenharmony_ci%assign %%i 0
60cabdff1aSopenharmony_ci%rep LEN/mmsize
61cabdff1aSopenharmony_ci    LOAD         m0, [r1 + %%i]
62cabdff1aSopenharmony_ci    LOAD         m1, [r1+r2 + %%i]
63cabdff1aSopenharmony_ci    LOAD         m2, [r1+r2*2 + %%i]
64cabdff1aSopenharmony_ci    LOAD         m3, [r1+r4 + %%i]
65cabdff1aSopenharmony_ci%ifidn %1, avg
66cabdff1aSopenharmony_ci%if notcpuflag(mmxext)
67cabdff1aSopenharmony_ci    PAVGB_MMX    [r0 + %%i], m0, m4, m6
68cabdff1aSopenharmony_ci    PAVGB_MMX    [r0+r2 + %%i], m1, m5, m6
69cabdff1aSopenharmony_ci    PAVGB_MMX    [r0+r2*2 + %%i], m2, m4, m6
70cabdff1aSopenharmony_ci    PAVGB_MMX    [r0+r4 + %%i], m3, m5, m6
71cabdff1aSopenharmony_ci%else
72cabdff1aSopenharmony_ci    pavgb        m0, [r0 + %%i]
73cabdff1aSopenharmony_ci    pavgb        m1, [r0+r2 + %%i]
74cabdff1aSopenharmony_ci    pavgb        m2, [r0+r2*2 + %%i]
75cabdff1aSopenharmony_ci    pavgb        m3, [r0+r4 + %%i]
76cabdff1aSopenharmony_ci%endif
77cabdff1aSopenharmony_ci%endif
78cabdff1aSopenharmony_ci    SAVE       [r0 + %%i], m0
79cabdff1aSopenharmony_ci    SAVE    [r0+r2 + %%i], m1
80cabdff1aSopenharmony_ci    SAVE  [r0+r2*2 + %%i], m2
81cabdff1aSopenharmony_ci    SAVE    [r0+r4 + %%i], m3
82cabdff1aSopenharmony_ci%assign %%i %%i+mmsize
83cabdff1aSopenharmony_ci%endrep
84cabdff1aSopenharmony_ci    sub         r3d, 4
85cabdff1aSopenharmony_ci    lea          r1, [r1+r2*4]
86cabdff1aSopenharmony_ci    lea          r0, [r0+r2*4]
87cabdff1aSopenharmony_ci    jne       .loop
88cabdff1aSopenharmony_ci    RET
89cabdff1aSopenharmony_ci%endmacro
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ciINIT_MMX mmx
92cabdff1aSopenharmony_ciOP_PIXELS put, 4
93cabdff1aSopenharmony_ciOP_PIXELS put, 8
94cabdff1aSopenharmony_ciOP_PIXELS put, 16
95cabdff1aSopenharmony_ciOP_PIXELS avg, 16
96cabdff1aSopenharmony_ci
97cabdff1aSopenharmony_ciINIT_MMX mmxext
98cabdff1aSopenharmony_ciOP_PIXELS avg, 4
99cabdff1aSopenharmony_ciOP_PIXELS avg, 8
100cabdff1aSopenharmony_ciOP_PIXELS avg, 16
101cabdff1aSopenharmony_ci
102cabdff1aSopenharmony_ciINIT_XMM sse2
103cabdff1aSopenharmony_ciOP_PIXELS put, 16
104cabdff1aSopenharmony_ciOP_PIXELS avg, 16
105