1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD-optimized quarterpel functions
3cabdff1aSopenharmony_ci;* Copyright (c) 2008 Loren Merritt
4cabdff1aSopenharmony_ci;* Copyright (c) 2003-2013 Michael Niedermayer
5cabdff1aSopenharmony_ci;* Copyright (c) 2013 Daniel Kang
6cabdff1aSopenharmony_ci;*
7cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
8cabdff1aSopenharmony_ci;*
9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci;*
14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
18cabdff1aSopenharmony_ci;*
19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci;******************************************************************************
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ciSECTION .text
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci%macro op_avgh 3
29cabdff1aSopenharmony_ci    movh   %3, %2
30cabdff1aSopenharmony_ci    pavgb  %1, %3
31cabdff1aSopenharmony_ci    movh   %2, %1
32cabdff1aSopenharmony_ci%endmacro
33cabdff1aSopenharmony_ci
34cabdff1aSopenharmony_ci%macro op_avg 2
35cabdff1aSopenharmony_ci    pavgb  %1, %2
36cabdff1aSopenharmony_ci    mova   %2, %1
37cabdff1aSopenharmony_ci%endmacro
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_ci%macro op_puth 2-3
40cabdff1aSopenharmony_ci    movh   %2, %1
41cabdff1aSopenharmony_ci%endmacro
42cabdff1aSopenharmony_ci
43cabdff1aSopenharmony_ci%macro op_put 2
44cabdff1aSopenharmony_ci    mova   %2, %1
45cabdff1aSopenharmony_ci%endmacro
46cabdff1aSopenharmony_ci
47cabdff1aSopenharmony_ci; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
48cabdff1aSopenharmony_ci;                                   int dstStride, int src1Stride, int h)
49cabdff1aSopenharmony_ci%macro PIXELS4_L2 1
50cabdff1aSopenharmony_ci%define OP op_%1h
51cabdff1aSopenharmony_cicglobal %1_pixels4_l2, 6,6
52cabdff1aSopenharmony_ci    movsxdifnidn r3, r3d
53cabdff1aSopenharmony_ci    movsxdifnidn r4, r4d
54cabdff1aSopenharmony_ci    test        r5d, 1
55cabdff1aSopenharmony_ci    je        .loop
56cabdff1aSopenharmony_ci    movd         m0, [r1]
57cabdff1aSopenharmony_ci    movd         m1, [r2]
58cabdff1aSopenharmony_ci    add          r1, r4
59cabdff1aSopenharmony_ci    add          r2, 4
60cabdff1aSopenharmony_ci    pavgb        m0, m1
61cabdff1aSopenharmony_ci    OP           m0, [r0], m3
62cabdff1aSopenharmony_ci    add          r0, r3
63cabdff1aSopenharmony_ci    dec         r5d
64cabdff1aSopenharmony_ci.loop:
65cabdff1aSopenharmony_ci    mova         m0, [r1]
66cabdff1aSopenharmony_ci    mova         m1, [r1+r4]
67cabdff1aSopenharmony_ci    lea          r1, [r1+2*r4]
68cabdff1aSopenharmony_ci    pavgb        m0, [r2]
69cabdff1aSopenharmony_ci    pavgb        m1, [r2+4]
70cabdff1aSopenharmony_ci    OP           m0, [r0], m3
71cabdff1aSopenharmony_ci    OP           m1, [r0+r3], m3
72cabdff1aSopenharmony_ci    lea          r0, [r0+2*r3]
73cabdff1aSopenharmony_ci    mova         m0, [r1]
74cabdff1aSopenharmony_ci    mova         m1, [r1+r4]
75cabdff1aSopenharmony_ci    lea          r1, [r1+2*r4]
76cabdff1aSopenharmony_ci    pavgb        m0, [r2+8]
77cabdff1aSopenharmony_ci    pavgb        m1, [r2+12]
78cabdff1aSopenharmony_ci    OP           m0, [r0], m3
79cabdff1aSopenharmony_ci    OP           m1, [r0+r3], m3
80cabdff1aSopenharmony_ci    lea          r0, [r0+2*r3]
81cabdff1aSopenharmony_ci    add          r2, 16
82cabdff1aSopenharmony_ci    sub         r5d, 4
83cabdff1aSopenharmony_ci    jne       .loop
84cabdff1aSopenharmony_ci    REP_RET
85cabdff1aSopenharmony_ci%endmacro
86cabdff1aSopenharmony_ci
87cabdff1aSopenharmony_ciINIT_MMX mmxext
88cabdff1aSopenharmony_ciPIXELS4_L2 put
89cabdff1aSopenharmony_ciPIXELS4_L2 avg
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci; void ff_put/avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
92cabdff1aSopenharmony_ci;                                   int dstStride, int src1Stride, int h)
93cabdff1aSopenharmony_ci%macro PIXELS8_L2 1
94cabdff1aSopenharmony_ci%define OP op_%1
95cabdff1aSopenharmony_cicglobal %1_pixels8_l2, 6,6
96cabdff1aSopenharmony_ci    movsxdifnidn r3, r3d
97cabdff1aSopenharmony_ci    movsxdifnidn r4, r4d
98cabdff1aSopenharmony_ci    test        r5d, 1
99cabdff1aSopenharmony_ci    je        .loop
100cabdff1aSopenharmony_ci    mova         m0, [r1]
101cabdff1aSopenharmony_ci    mova         m1, [r2]
102cabdff1aSopenharmony_ci    add          r1, r4
103cabdff1aSopenharmony_ci    add          r2, 8
104cabdff1aSopenharmony_ci    pavgb        m0, m1
105cabdff1aSopenharmony_ci    OP           m0, [r0]
106cabdff1aSopenharmony_ci    add          r0, r3
107cabdff1aSopenharmony_ci    dec         r5d
108cabdff1aSopenharmony_ci.loop:
109cabdff1aSopenharmony_ci    mova         m0, [r1]
110cabdff1aSopenharmony_ci    mova         m1, [r1+r4]
111cabdff1aSopenharmony_ci    lea          r1, [r1+2*r4]
112cabdff1aSopenharmony_ci    pavgb        m0, [r2]
113cabdff1aSopenharmony_ci    pavgb        m1, [r2+8]
114cabdff1aSopenharmony_ci    OP           m0, [r0]
115cabdff1aSopenharmony_ci    OP           m1, [r0+r3]
116cabdff1aSopenharmony_ci    lea          r0, [r0+2*r3]
117cabdff1aSopenharmony_ci    mova         m0, [r1]
118cabdff1aSopenharmony_ci    mova         m1, [r1+r4]
119cabdff1aSopenharmony_ci    lea          r1, [r1+2*r4]
120cabdff1aSopenharmony_ci    pavgb        m0, [r2+16]
121cabdff1aSopenharmony_ci    pavgb        m1, [r2+24]
122cabdff1aSopenharmony_ci    OP           m0, [r0]
123cabdff1aSopenharmony_ci    OP           m1, [r0+r3]
124cabdff1aSopenharmony_ci    lea          r0, [r0+2*r3]
125cabdff1aSopenharmony_ci    add          r2, 32
126cabdff1aSopenharmony_ci    sub         r5d, 4
127cabdff1aSopenharmony_ci    jne       .loop
128cabdff1aSopenharmony_ci    REP_RET
129cabdff1aSopenharmony_ci%endmacro
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ciINIT_MMX mmxext
132cabdff1aSopenharmony_ciPIXELS8_L2 put
133cabdff1aSopenharmony_ciPIXELS8_L2 avg
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci; void ff_put/avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
136cabdff1aSopenharmony_ci;                                    int dstStride, int src1Stride, int h)
137cabdff1aSopenharmony_ci%macro PIXELS16_L2 1
138cabdff1aSopenharmony_ci%define OP op_%1
139cabdff1aSopenharmony_cicglobal %1_pixels16_l2, 6,6
140cabdff1aSopenharmony_ci    movsxdifnidn r3, r3d
141cabdff1aSopenharmony_ci    movsxdifnidn r4, r4d
142cabdff1aSopenharmony_ci    test        r5d, 1
143cabdff1aSopenharmony_ci    je        .loop
144cabdff1aSopenharmony_ci    mova         m0, [r1]
145cabdff1aSopenharmony_ci    mova         m1, [r1+8]
146cabdff1aSopenharmony_ci    pavgb        m0, [r2]
147cabdff1aSopenharmony_ci    pavgb        m1, [r2+8]
148cabdff1aSopenharmony_ci    add          r1, r4
149cabdff1aSopenharmony_ci    add          r2, 16
150cabdff1aSopenharmony_ci    OP           m0, [r0]
151cabdff1aSopenharmony_ci    OP           m1, [r0+8]
152cabdff1aSopenharmony_ci    add          r0, r3
153cabdff1aSopenharmony_ci    dec         r5d
154cabdff1aSopenharmony_ci.loop:
155cabdff1aSopenharmony_ci    mova         m0, [r1]
156cabdff1aSopenharmony_ci    mova         m1, [r1+8]
157cabdff1aSopenharmony_ci    add          r1, r4
158cabdff1aSopenharmony_ci    pavgb        m0, [r2]
159cabdff1aSopenharmony_ci    pavgb        m1, [r2+8]
160cabdff1aSopenharmony_ci    OP           m0, [r0]
161cabdff1aSopenharmony_ci    OP           m1, [r0+8]
162cabdff1aSopenharmony_ci    add          r0, r3
163cabdff1aSopenharmony_ci    mova         m0, [r1]
164cabdff1aSopenharmony_ci    mova         m1, [r1+8]
165cabdff1aSopenharmony_ci    add          r1, r4
166cabdff1aSopenharmony_ci    pavgb        m0, [r2+16]
167cabdff1aSopenharmony_ci    pavgb        m1, [r2+24]
168cabdff1aSopenharmony_ci    OP           m0, [r0]
169cabdff1aSopenharmony_ci    OP           m1, [r0+8]
170cabdff1aSopenharmony_ci    add          r0, r3
171cabdff1aSopenharmony_ci    add          r2, 32
172cabdff1aSopenharmony_ci    sub         r5d, 2
173cabdff1aSopenharmony_ci    jne       .loop
174cabdff1aSopenharmony_ci    REP_RET
175cabdff1aSopenharmony_ci%endmacro
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ciINIT_MMX mmxext
178cabdff1aSopenharmony_ciPIXELS16_L2 put
179cabdff1aSopenharmony_ciPIXELS16_L2 avg
180