1/*
2 * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
8 * and improved by Zdenek Kabelac <kabi@users.sf.net>
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27#include <stddef.h>
28#include <stdint.h>
29
30// put_pixels
31av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
32{
33    MOVQ_BFE(mm6);
34    __asm__ volatile(
35        "lea    (%3, %3), %%"FF_REG_a"  \n\t"
36        ".p2align 3                     \n\t"
37        "1:                             \n\t"
38        "movq   (%1), %%mm0             \n\t"
39        "movq   1(%1), %%mm1            \n\t"
40        "movq   (%1, %3), %%mm2         \n\t"
41        "movq   1(%1, %3), %%mm3        \n\t"
42        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
43        "movq   %%mm4, (%2)             \n\t"
44        "movq   %%mm5, (%2, %3)         \n\t"
45        "add    %%"FF_REG_a", %1        \n\t"
46        "add    %%"FF_REG_a", %2        \n\t"
47        "movq   (%1), %%mm0             \n\t"
48        "movq   1(%1), %%mm1            \n\t"
49        "movq   (%1, %3), %%mm2         \n\t"
50        "movq   1(%1, %3), %%mm3        \n\t"
51        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
52        "movq   %%mm4, (%2)             \n\t"
53        "movq   %%mm5, (%2, %3)         \n\t"
54        "add    %%"FF_REG_a", %1        \n\t"
55        "add    %%"FF_REG_a", %2        \n\t"
56        "subl   $4, %0                  \n\t"
57        "jnz    1b                      \n\t"
58        :"+g"(h), "+S"(pixels), "+D"(block)
59        :"r"((x86_reg)line_size)
60        :FF_REG_a, "memory");
61}
62
63av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
64{
65    MOVQ_BFE(mm6);
66    __asm__ volatile(
67        "lea    (%3, %3), %%"FF_REG_a"  \n\t"
68        ".p2align 3                     \n\t"
69        "1:                             \n\t"
70        "movq   (%1), %%mm0             \n\t"
71        "movq   1(%1), %%mm1            \n\t"
72        "movq   (%1, %3), %%mm2         \n\t"
73        "movq   1(%1, %3), %%mm3        \n\t"
74        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
75        "movq   %%mm4, (%2)             \n\t"
76        "movq   %%mm5, (%2, %3)         \n\t"
77        "movq   8(%1), %%mm0            \n\t"
78        "movq   9(%1), %%mm1            \n\t"
79        "movq   8(%1, %3), %%mm2        \n\t"
80        "movq   9(%1, %3), %%mm3        \n\t"
81        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
82        "movq   %%mm4, 8(%2)            \n\t"
83        "movq   %%mm5, 8(%2, %3)        \n\t"
84        "add    %%"FF_REG_a", %1        \n\t"
85        "add    %%"FF_REG_a", %2        \n\t"
86        "movq   (%1), %%mm0             \n\t"
87        "movq   1(%1), %%mm1            \n\t"
88        "movq   (%1, %3), %%mm2         \n\t"
89        "movq   1(%1, %3), %%mm3        \n\t"
90        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
91        "movq   %%mm4, (%2)             \n\t"
92        "movq   %%mm5, (%2, %3)         \n\t"
93        "movq   8(%1), %%mm0            \n\t"
94        "movq   9(%1), %%mm1            \n\t"
95        "movq   8(%1, %3), %%mm2        \n\t"
96        "movq   9(%1, %3), %%mm3        \n\t"
97        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
98        "movq   %%mm4, 8(%2)            \n\t"
99        "movq   %%mm5, 8(%2, %3)        \n\t"
100        "add    %%"FF_REG_a", %1        \n\t"
101        "add    %%"FF_REG_a", %2        \n\t"
102        "subl   $4, %0                  \n\t"
103        "jnz    1b                      \n\t"
104        :"+g"(h), "+S"(pixels), "+D"(block)
105        :"r"((x86_reg)line_size)
106        :FF_REG_a, "memory");
107}
108
109av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
110{
111    MOVQ_BFE(mm6);
112    __asm__ volatile(
113        "lea (%3, %3), %%"FF_REG_a"     \n\t"
114        "movq (%1), %%mm0               \n\t"
115        ".p2align 3                     \n\t"
116        "1:                             \n\t"
117        "movq   (%1, %3), %%mm1         \n\t"
118        "movq   (%1, %%"FF_REG_a"),%%mm2\n\t"
119        PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
120        "movq   %%mm4, (%2)             \n\t"
121        "movq   %%mm5, (%2, %3)         \n\t"
122        "add    %%"FF_REG_a", %1        \n\t"
123        "add    %%"FF_REG_a", %2        \n\t"
124        "movq   (%1, %3), %%mm1         \n\t"
125        "movq   (%1, %%"FF_REG_a"),%%mm0\n\t"
126        PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
127        "movq   %%mm4, (%2)             \n\t"
128        "movq   %%mm5, (%2, %3)         \n\t"
129        "add    %%"FF_REG_a", %1        \n\t"
130        "add    %%"FF_REG_a", %2        \n\t"
131        "subl   $4, %0                  \n\t"
132        "jnz    1b                      \n\t"
133        :"+g"(h), "+S"(pixels), "+D"(block)
134        :"r"((x86_reg)line_size)
135        :FF_REG_a, "memory");
136}
137
138av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
139{
140    MOVQ_BFE(mm6);
141        __asm__ volatile(
142            ".p2align 3                 \n\t"
143            "1:                         \n\t"
144            "movq  (%1), %%mm0          \n\t"
145            "movq  1(%1), %%mm1         \n\t"
146            "movq  (%2), %%mm3          \n\t"
147            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
148            PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
149            "movq  %%mm0, (%2)          \n\t"
150            "movq  8(%1), %%mm0         \n\t"
151            "movq  9(%1), %%mm1         \n\t"
152            "movq  8(%2), %%mm3         \n\t"
153            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
154            PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
155            "movq  %%mm0, 8(%2)         \n\t"
156            "add    %3, %1              \n\t"
157            "add    %3, %2              \n\t"
158            "subl   $1, %0              \n\t"
159            "jnz    1b                  \n\t"
160            :"+g"(h), "+S"(pixels), "+D"(block)
161            :"r"((x86_reg)line_size)
162            :"memory");
163}
164
165av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
166{
167    MOVQ_BFE(mm6);
168    __asm__ volatile(
169        "lea    (%3, %3), %%"FF_REG_a"  \n\t"
170        "movq   (%1), %%mm0             \n\t"
171        ".p2align 3                     \n\t"
172        "1:                             \n\t"
173        "movq   (%1, %3), %%mm1         \n\t"
174        "movq   (%1, %%"FF_REG_a"), %%mm2 \n\t"
175        PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
176        "movq   (%2), %%mm3             \n\t"
177        PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
178        "movq   (%2, %3), %%mm3         \n\t"
179        PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
180        "movq   %%mm0, (%2)             \n\t"
181        "movq   %%mm1, (%2, %3)         \n\t"
182        "add    %%"FF_REG_a", %1        \n\t"
183        "add    %%"FF_REG_a", %2        \n\t"
184
185        "movq   (%1, %3), %%mm1         \n\t"
186        "movq   (%1, %%"FF_REG_a"), %%mm0 \n\t"
187        PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
188        "movq   (%2), %%mm3             \n\t"
189        PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
190        "movq   (%2, %3), %%mm3         \n\t"
191        PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
192        "movq   %%mm2, (%2)             \n\t"
193        "movq   %%mm1, (%2, %3)         \n\t"
194        "add    %%"FF_REG_a", %1        \n\t"
195        "add    %%"FF_REG_a", %2        \n\t"
196
197        "subl   $4, %0                  \n\t"
198        "jnz    1b                      \n\t"
199        :"+g"(h), "+S"(pixels), "+D"(block)
200        :"r"((x86_reg)line_size)
201        :FF_REG_a, "memory");
202}
203