1cabdff1aSopenharmony_ci;*****************************************************************************
2cabdff1aSopenharmony_ci;* SIMD-optimized pixel operations
3cabdff1aSopenharmony_ci;*****************************************************************************
4cabdff1aSopenharmony_ci;* Copyright (c) 2000, 2001 Fabrice Bellard
5cabdff1aSopenharmony_ci;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6cabdff1aSopenharmony_ci;*
7cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
8cabdff1aSopenharmony_ci;*
9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci;*
14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
18cabdff1aSopenharmony_ci;*
19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci;*****************************************************************************
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ciSECTION .text
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ciINIT_XMM sse2
29cabdff1aSopenharmony_cicglobal get_pixels, 3, 4, 5
30cabdff1aSopenharmony_ci    lea          r3, [r2*3]
31cabdff1aSopenharmony_ci    pxor         m4, m4
32cabdff1aSopenharmony_ci    movh         m0, [r1]
33cabdff1aSopenharmony_ci    movh         m1, [r1+r2]
34cabdff1aSopenharmony_ci    movh         m2, [r1+r2*2]
35cabdff1aSopenharmony_ci    movh         m3, [r1+r3]
36cabdff1aSopenharmony_ci    lea          r1, [r1+r2*4]
37cabdff1aSopenharmony_ci    punpcklbw    m0, m4
38cabdff1aSopenharmony_ci    punpcklbw    m1, m4
39cabdff1aSopenharmony_ci    punpcklbw    m2, m4
40cabdff1aSopenharmony_ci    punpcklbw    m3, m4
41cabdff1aSopenharmony_ci    mova       [r0], m0
42cabdff1aSopenharmony_ci    mova  [r0+0x10], m1
43cabdff1aSopenharmony_ci    mova  [r0+0x20], m2
44cabdff1aSopenharmony_ci    mova  [r0+0x30], m3
45cabdff1aSopenharmony_ci    movh         m0, [r1]
46cabdff1aSopenharmony_ci    movh         m1, [r1+r2*1]
47cabdff1aSopenharmony_ci    movh         m2, [r1+r2*2]
48cabdff1aSopenharmony_ci    movh         m3, [r1+r3]
49cabdff1aSopenharmony_ci    punpcklbw    m0, m4
50cabdff1aSopenharmony_ci    punpcklbw    m1, m4
51cabdff1aSopenharmony_ci    punpcklbw    m2, m4
52cabdff1aSopenharmony_ci    punpcklbw    m3, m4
53cabdff1aSopenharmony_ci    mova  [r0+0x40], m0
54cabdff1aSopenharmony_ci    mova  [r0+0x50], m1
55cabdff1aSopenharmony_ci    mova  [r0+0x60], m2
56cabdff1aSopenharmony_ci    mova  [r0+0x70], m3
57cabdff1aSopenharmony_ci    RET
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_ci; void ff_diff_pixels(int16_t *block, const uint8_t *s1, const uint8_t *s2,
60cabdff1aSopenharmony_ci;                     ptrdiff_t stride);
61cabdff1aSopenharmony_ciINIT_XMM sse2
62cabdff1aSopenharmony_cicglobal diff_pixels, 4,5,5
63cabdff1aSopenharmony_ci    pxor         m4, m4
64cabdff1aSopenharmony_ci    add          r0,  128
65cabdff1aSopenharmony_ci    mov          r4, -128
66cabdff1aSopenharmony_ci.loop:
67cabdff1aSopenharmony_ci    movq         m0, [r1]
68cabdff1aSopenharmony_ci    movq         m2, [r2]
69cabdff1aSopenharmony_ci    movq         m1, [r1+r3]
70cabdff1aSopenharmony_ci    movq         m3, [r2+r3]
71cabdff1aSopenharmony_ci    punpcklbw    m0, m4
72cabdff1aSopenharmony_ci    punpcklbw    m1, m4
73cabdff1aSopenharmony_ci    punpcklbw    m2, m4
74cabdff1aSopenharmony_ci    punpcklbw    m3, m4
75cabdff1aSopenharmony_ci    psubw        m0, m2
76cabdff1aSopenharmony_ci    psubw        m1, m3
77cabdff1aSopenharmony_ci    mova  [r0+r4+0], m0
78cabdff1aSopenharmony_ci    mova  [r0+r4+mmsize], m1
79cabdff1aSopenharmony_ci    lea          r1, [r1+r3*2]
80cabdff1aSopenharmony_ci    lea          r2, [r2+r3*2]
81cabdff1aSopenharmony_ci    add          r4, 2 * mmsize
82cabdff1aSopenharmony_ci    jne .loop
83cabdff1aSopenharmony_ci    RET
84