1/*
2 * quarterpel DSP functions
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include <stddef.h>
24#include <stdint.h>
25
26#include "config.h"
27#include "libavutil/attributes.h"
28#include "libavutil/cpu.h"
29#include "libavutil/x86/cpu.h"
30#include "libavcodec/pixels.h"
31#include "libavcodec/qpeldsp.h"
32#include "fpel.h"
33
34void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35                              const uint8_t *src1, const uint8_t *src2,
36                              int dstStride, int src1Stride, int h);
37void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
38                                     const uint8_t *src1, const uint8_t *src2,
39                                     int dstStride, int src1Stride, int h);
40void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41                              const uint8_t *src1, const uint8_t *src2,
42                              int dstStride, int src1Stride, int h);
43void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44                               const uint8_t *src1, const uint8_t *src2,
45                               int dstStride, int src1Stride, int h);
46void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47                               const uint8_t *src1, const uint8_t *src2,
48                               int dstStride, int src1Stride, int h);
49void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
50                                      const uint8_t *src1, const uint8_t *src2,
51                                      int dstStride, int src1Stride, int h);
52void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53                                          int dstStride, int srcStride, int h);
54void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55                                          int dstStride, int srcStride, int h);
56void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
57                                                 const uint8_t *src,
58                                                 int dstStride, int srcStride,
59                                                 int h);
60void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61                                         int dstStride, int srcStride, int h);
62void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63                                         int dstStride, int srcStride, int h);
64void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
65                                                const uint8_t *src,
66                                                int dstStride, int srcStride,
67                                                int h);
68void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69                                          int dstStride, int srcStride);
70void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71                                          int dstStride, int srcStride);
72void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
73                                                 const uint8_t *src,
74                                                 int dstStride, int srcStride);
75void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76                                         int dstStride, int srcStride);
77void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78                                         int dstStride, int srcStride);
79void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
80                                                const uint8_t *src,
81                                                int dstStride, int srcStride);
82#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
83#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
84
85#if HAVE_X86ASM
86
87#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
88#define ff_put_pixels8_mmxext  ff_put_pixels8_mmx
89
90#define QPEL_OP(OPNAME, RND, MMX)                                       \
91static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst,                  \
92                                         const uint8_t *src,            \
93                                         ptrdiff_t stride)              \
94{                                                                       \
95    ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \
96}                                                                       \
97                                                                        \
98static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst,                  \
99                                         const uint8_t *src,            \
100                                         ptrdiff_t stride)              \
101{                                                                       \
102    uint64_t temp[8];                                                   \
103    uint8_t *const half = (uint8_t *) temp;                             \
104    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
105                                                   stride, 8);          \
106    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
107                                        stride, stride, 8);             \
108}                                                                       \
109                                                                        \
110static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst,                  \
111                                         const uint8_t *src,            \
112                                         ptrdiff_t stride)              \
113{                                                                       \
114    ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \
115                                                   stride, 8);          \
116}                                                                       \
117                                                                        \
118static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst,                  \
119                                         const uint8_t *src,            \
120                                         ptrdiff_t stride)              \
121{                                                                       \
122    uint64_t temp[8];                                                   \
123    uint8_t *const half = (uint8_t *) temp;                             \
124    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
125                                                   stride, 8);          \
126    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \
127                                        stride, 8);                     \
128}                                                                       \
129                                                                        \
130static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst,                  \
131                                         const uint8_t *src,            \
132                                         ptrdiff_t stride)              \
133{                                                                       \
134    uint64_t temp[8];                                                   \
135    uint8_t *const half = (uint8_t *) temp;                             \
136    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
137                                                   8, stride);          \
138    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
139                                        stride, stride, 8);             \
140}                                                                       \
141                                                                        \
142static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst,                  \
143                                         const uint8_t *src,            \
144                                         ptrdiff_t stride)              \
145{                                                                       \
146    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \
147                                                   stride, stride);     \
148}                                                                       \
149                                                                        \
150static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst,                  \
151                                         const uint8_t *src,            \
152                                         ptrdiff_t stride)              \
153{                                                                       \
154    uint64_t temp[8];                                                   \
155    uint8_t *const half = (uint8_t *) temp;                             \
156    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
157                                                   8, stride);          \
158    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
159                                        stride, 8);                     \
160}                                                                       \
161                                                                        \
162static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst,                  \
163                                         const uint8_t *src,            \
164                                         ptrdiff_t stride)              \
165{                                                                       \
166    uint64_t half[8 + 9];                                               \
167    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
168    uint8_t *const halfHV = (uint8_t *) half;                           \
169    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
170                                                   stride, 9);          \
171    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
172                                        stride, 9);                     \
173    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
174    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
175                                        stride, 8, 8);                  \
176}                                                                       \
177                                                                        \
178static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst,                  \
179                                         const uint8_t *src,            \
180                                         ptrdiff_t stride)              \
181{                                                                       \
182    uint64_t half[8 + 9];                                               \
183    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
184    uint8_t *const halfHV = (uint8_t *) half;                           \
185    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
186                                                   stride, 9);          \
187    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
188                                        stride, 9);                     \
189    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
190    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
191                                        stride, 8, 8);                  \
192}                                                                       \
193                                                                        \
194static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst,                  \
195                                         const uint8_t *src,            \
196                                         ptrdiff_t stride)              \
197{                                                                       \
198    uint64_t half[8 + 9];                                               \
199    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
200    uint8_t *const halfHV = (uint8_t *) half;                           \
201    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
202                                                   stride, 9);          \
203    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
204                                        stride, 9);                     \
205    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
206    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
207                                        stride, 8, 8);                  \
208}                                                                       \
209                                                                        \
210static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst,                  \
211                                         const uint8_t *src,            \
212                                         ptrdiff_t stride)              \
213{                                                                       \
214    uint64_t half[8 + 9];                                               \
215    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
216    uint8_t *const halfHV = (uint8_t *) half;                           \
217    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
218                                                   stride, 9);          \
219    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
220                                        stride, 9);                     \
221    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
222    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
223                                        stride, 8, 8);                  \
224}                                                                       \
225                                                                        \
226static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst,                  \
227                                         const uint8_t *src,            \
228                                         ptrdiff_t stride)              \
229{                                                                       \
230    uint64_t half[8 + 9];                                               \
231    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
232    uint8_t *const halfHV = (uint8_t *) half;                           \
233    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
234                                                   stride, 9);          \
235    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
236    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
237                                        stride, 8, 8);                  \
238}                                                                       \
239                                                                        \
240static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst,                  \
241                                         const uint8_t *src,            \
242                                         ptrdiff_t stride)              \
243{                                                                       \
244    uint64_t half[8 + 9];                                               \
245    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
246    uint8_t *const halfHV = (uint8_t *) half;                           \
247    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
248                                                   stride, 9);          \
249    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
250    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
251                                        stride, 8, 8);                  \
252}                                                                       \
253                                                                        \
254static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst,                  \
255                                         const uint8_t *src,            \
256                                         ptrdiff_t stride)              \
257{                                                                       \
258    uint64_t half[8 + 9];                                               \
259    uint8_t *const halfH = (uint8_t *) half;                            \
260    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
261                                                   stride, 9);          \
262    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \
263                                        8, stride, 9);                  \
264    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
265                                                   stride, 8);          \
266}                                                                       \
267                                                                        \
268static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst,                  \
269                                         const uint8_t *src,            \
270                                         ptrdiff_t stride)              \
271{                                                                       \
272    uint64_t half[8 + 9];                                               \
273    uint8_t *const halfH = (uint8_t *) half;                            \
274    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
275                                                   stride, 9);          \
276    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
277                                        stride, 9);                     \
278    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
279                                                   stride, 8);          \
280}                                                                       \
281                                                                        \
282static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst,                  \
283                                         const uint8_t *src,            \
284                                         ptrdiff_t stride)              \
285{                                                                       \
286    uint64_t half[9];                                                   \
287    uint8_t *const halfH = (uint8_t *) half;                            \
288    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
289                                                   stride, 9);          \
290    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
291                                                   stride, 8);          \
292}                                                                       \
293                                                                        \
294static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst,                 \
295                                          const uint8_t *src,           \
296                                          ptrdiff_t stride)             \
297{                                                                       \
298    ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \
299}                                                                       \
300                                                                        \
301static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst,                 \
302                                          const uint8_t *src,           \
303                                          ptrdiff_t stride)             \
304{                                                                       \
305    uint64_t temp[32];                                                  \
306    uint8_t *const half = (uint8_t *) temp;                             \
307    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
308                                                    stride, 16);        \
309    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
310                                         stride, 16);                   \
311}                                                                       \
312                                                                        \
313static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst,                 \
314                                          const uint8_t *src,           \
315                                          ptrdiff_t stride)             \
316{                                                                       \
317    ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \
318                                                    stride, stride, 16);\
319}                                                                       \
320                                                                        \
321static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst,                 \
322                                          const uint8_t *src,           \
323                                          ptrdiff_t stride)             \
324{                                                                       \
325    uint64_t temp[32];                                                  \
326    uint8_t *const half = (uint8_t*) temp;                              \
327    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
328                                                    stride, 16);        \
329    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \
330                                         stride, stride, 16);           \
331}                                                                       \
332                                                                        \
333static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst,                 \
334                                          const uint8_t *src,           \
335                                          ptrdiff_t stride)             \
336{                                                                       \
337    uint64_t temp[32];                                                  \
338    uint8_t *const half = (uint8_t *) temp;                             \
339    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
340                                                    stride);            \
341    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
342                                         stride, 16);                   \
343}                                                                       \
344                                                                        \
345static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst,                 \
346                                          const uint8_t *src,           \
347                                          ptrdiff_t stride)             \
348{                                                                       \
349    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \
350                                                    stride, stride);    \
351}                                                                       \
352                                                                        \
353static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst,                 \
354                                          const uint8_t *src,           \
355                                          ptrdiff_t stride)             \
356{                                                                       \
357    uint64_t temp[32];                                                  \
358    uint8_t *const half = (uint8_t *) temp;                             \
359    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
360                                                    stride);            \
361    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \
362                                         stride, stride, 16);           \
363}                                                                       \
364                                                                        \
365static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst,                 \
366                                          const uint8_t *src,           \
367                                          ptrdiff_t stride)             \
368{                                                                       \
369    uint64_t half[16 * 2 + 17 * 2];                                     \
370    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
371    uint8_t *const halfHV = (uint8_t *) half;                           \
372    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
373                                                    stride, 17);        \
374    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
375                                         stride, 17);                   \
376    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
377                                                    16, 16);            \
378    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
379                                         stride, 16, 16);               \
380}                                                                       \
381                                                                        \
382static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst,                 \
383                                          const uint8_t *src,           \
384                                          ptrdiff_t stride)             \
385{                                                                       \
386    uint64_t half[16 * 2 + 17 * 2];                                     \
387    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
388    uint8_t *const halfHV = (uint8_t *) half;                           \
389    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
390                                                    stride, 17);        \
391    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
392                                         stride, 17);                   \
393    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
394                                                    16, 16);            \
395    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
396                                         stride, 16, 16);               \
397}                                                                       \
398                                                                        \
399static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst,                 \
400                                          const uint8_t *src,           \
401                                          ptrdiff_t stride)             \
402{                                                                       \
403    uint64_t half[16 * 2 + 17 * 2];                                     \
404    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
405    uint8_t *const halfHV = (uint8_t *) half;                           \
406    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
407                                                    stride, 17);        \
408    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
409                                         stride, 17);                   \
410    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
411                                                    16, 16);            \
412    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
413                                         stride, 16, 16);               \
414}                                                                       \
415                                                                        \
416static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst,                 \
417                                          const uint8_t *src,           \
418                                          ptrdiff_t stride)             \
419{                                                                       \
420    uint64_t half[16 * 2 + 17 * 2];                                     \
421    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
422    uint8_t *const halfHV = (uint8_t *) half;                           \
423    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
424                                                    stride, 17);        \
425    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
426                                         stride, 17);                   \
427    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
428                                                    16, 16);            \
429    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
430                                         stride, 16, 16);               \
431}                                                                       \
432                                                                        \
433static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst,                 \
434                                          const uint8_t *src,           \
435                                          ptrdiff_t stride)             \
436{                                                                       \
437    uint64_t half[16 * 2 + 17 * 2];                                     \
438    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
439    uint8_t *const halfHV = (uint8_t *) half;                           \
440    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
441                                                    stride, 17);        \
442    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
443                                                    16, 16);            \
444    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
445                                         stride, 16, 16);               \
446}                                                                       \
447                                                                        \
448static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst,                 \
449                                          const uint8_t *src,           \
450                                          ptrdiff_t stride)             \
451{                                                                       \
452    uint64_t half[16 * 2 + 17 * 2];                                     \
453    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
454    uint8_t *const halfHV = (uint8_t *) half;                           \
455    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
456                                                    stride, 17);        \
457    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
458                                                    16, 16);            \
459    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
460                                         stride, 16, 16);               \
461}                                                                       \
462                                                                        \
463static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst,                 \
464                                          const uint8_t *src,           \
465                                          ptrdiff_t stride)             \
466{                                                                       \
467    uint64_t half[17 * 2];                                              \
468    uint8_t *const halfH = (uint8_t *) half;                            \
469    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
470                                                    stride, 17);        \
471    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
472                                         stride, 17);                   \
473    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
474                                                    stride, 16);        \
475}                                                                       \
476                                                                        \
477static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst,                 \
478                                          const uint8_t *src,           \
479                                          ptrdiff_t stride)             \
480{                                                                       \
481    uint64_t half[17 * 2];                                              \
482    uint8_t *const halfH = (uint8_t *) half;                            \
483    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
484                                                    stride, 17);        \
485    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
486                                         stride, 17);                   \
487    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
488                                                    stride, 16);        \
489}                                                                       \
490                                                                        \
491static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst,                 \
492                                          const uint8_t *src,           \
493                                          ptrdiff_t stride)             \
494{                                                                       \
495    uint64_t half[17 * 2];                                              \
496    uint8_t *const halfH = (uint8_t *) half;                            \
497    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
498                                                    stride, 17);        \
499    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
500                                                    stride, 16);        \
501}
502
503QPEL_OP(put_,        _,        mmxext)
504QPEL_OP(avg_,        _,        mmxext)
505QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
506
507#endif /* HAVE_X86ASM */
508
509#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
510do {                                                                         \
511    c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
512    c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
513    c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
514    c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
515    c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
516    c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
517    c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
518    c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
519    c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
520    c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
521    c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
522    c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
523    c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
524    c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
525    c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
526    c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
527} while (0)
528
529av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
530{
531    int cpu_flags = av_get_cpu_flags();
532
533    if (X86_MMXEXT(cpu_flags)) {
534#if HAVE_MMXEXT_EXTERNAL
535        SET_QPEL_FUNCS(avg_qpel,        0, 16, mmxext, );
536        SET_QPEL_FUNCS(avg_qpel,        1,  8, mmxext, );
537
538        SET_QPEL_FUNCS(put_qpel,        0, 16, mmxext, );
539        SET_QPEL_FUNCS(put_qpel,        1,  8, mmxext, );
540        SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
541        SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmxext, );
542#endif /* HAVE_MMXEXT_EXTERNAL */
543    }
544}
545