1/* 2 * quarterpel DSP functions 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include <stddef.h> 24#include <stdint.h> 25 26#include "config.h" 27#include "libavutil/attributes.h" 28#include "libavutil/cpu.h" 29#include "libavutil/x86/cpu.h" 30#include "libavcodec/pixels.h" 31#include "libavcodec/qpeldsp.h" 32#include "fpel.h" 33 34void ff_put_pixels8_l2_mmxext(uint8_t *dst, 35 const uint8_t *src1, const uint8_t *src2, 36 int dstStride, int src1Stride, int h); 37void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, 38 const uint8_t *src1, const uint8_t *src2, 39 int dstStride, int src1Stride, int h); 40void ff_avg_pixels8_l2_mmxext(uint8_t *dst, 41 const uint8_t *src1, const uint8_t *src2, 42 int dstStride, int src1Stride, int h); 43void ff_put_pixels16_l2_mmxext(uint8_t *dst, 44 const uint8_t *src1, const uint8_t *src2, 45 int dstStride, int src1Stride, int h); 46void ff_avg_pixels16_l2_mmxext(uint8_t *dst, 47 const uint8_t *src1, const uint8_t *src2, 48 int dstStride, int src1Stride, int h); 49void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, 50 const uint8_t *src1, const uint8_t *src2, 51 int dstStride, int src1Stride, int h); 52void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 53 int dstStride, int srcStride, int h); 54void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 55 int dstStride, int srcStride, int h); 56void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, 57 const uint8_t *src, 58 int dstStride, int srcStride, 59 int h); 60void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 61 int dstStride, int srcStride, int h); 62void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 63 int dstStride, int srcStride, int h); 64void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, 65 const uint8_t *src, 66 int dstStride, int srcStride, 67 int h); 68void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 69 int dstStride, int srcStride); 70void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 71 int dstStride, int srcStride); 72void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, 73 const uint8_t *src, 74 int dstStride, int srcStride); 75void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 76 int dstStride, int srcStride); 77void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 78 int dstStride, int srcStride); 79void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, 80 const uint8_t *src, 81 int dstStride, int srcStride); 82#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx 83#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx 84 85#if HAVE_X86ASM 86 87#define ff_put_pixels16_mmxext ff_put_pixels16_mmx 88#define ff_put_pixels8_mmxext ff_put_pixels8_mmx 89 90#define QPEL_OP(OPNAME, RND, MMX) \ 91static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \ 92 const uint8_t *src, \ 93 ptrdiff_t stride) \ 94{ \ 95 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \ 96} \ 97 \ 98static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ 99 const uint8_t *src, \ 100 ptrdiff_t stride) \ 101{ \ 102 uint64_t temp[8]; \ 103 uint8_t *const half = (uint8_t *) temp; \ 104 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ 105 stride, 8); \ 106 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ 107 stride, stride, 8); \ 108} \ 109 \ 110static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \ 111 const uint8_t *src, \ 112 ptrdiff_t stride) \ 113{ \ 114 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \ 115 stride, 8); \ 116} \ 117 \ 118static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \ 119 const uint8_t *src, \ 120 ptrdiff_t stride) \ 121{ \ 122 uint64_t temp[8]; \ 123 uint8_t *const half = (uint8_t *) temp; \ 124 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ 125 stride, 8); \ 126 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \ 127 stride, 8); \ 128} \ 129 \ 130static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \ 131 const uint8_t *src, \ 132 ptrdiff_t stride) \ 133{ \ 134 uint64_t temp[8]; \ 135 uint8_t *const half = (uint8_t *) temp; \ 136 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 137 8, stride); \ 138 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ 139 stride, stride, 8); \ 140} \ 141 \ 142static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \ 143 const uint8_t *src, \ 144 ptrdiff_t stride) \ 145{ \ 146 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \ 147 stride, stride); \ 148} \ 149 \ 150static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \ 151 const uint8_t *src, \ 152 ptrdiff_t stride) \ 153{ \ 154 uint64_t temp[8]; \ 155 uint8_t *const half = (uint8_t *) temp; \ 156 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 157 8, stride); \ 158 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\ 159 stride, 8); \ 160} \ 161 \ 162static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \ 163 const uint8_t *src, \ 164 ptrdiff_t stride) \ 165{ \ 166 uint64_t half[8 + 9]; \ 167 uint8_t *const halfH = (uint8_t *) half + 64; \ 168 uint8_t *const halfHV = (uint8_t *) half; \ 169 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 170 stride, 9); \ 171 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ 172 stride, 9); \ 173 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 174 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 175 stride, 8, 8); \ 176} \ 177 \ 178static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \ 179 const uint8_t *src, \ 180 ptrdiff_t stride) \ 181{ \ 182 uint64_t half[8 + 9]; \ 183 uint8_t *const halfH = (uint8_t *) half + 64; \ 184 uint8_t *const halfHV = (uint8_t *) half; \ 185 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 186 stride, 9); \ 187 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 188 stride, 9); \ 189 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 190 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 191 stride, 8, 8); \ 192} \ 193 \ 194static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \ 195 const uint8_t *src, \ 196 ptrdiff_t stride) \ 197{ \ 198 uint64_t half[8 + 9]; \ 199 uint8_t *const halfH = (uint8_t *) half + 64; \ 200 uint8_t *const halfHV = (uint8_t *) half; \ 201 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 202 stride, 9); \ 203 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ 204 stride, 9); \ 205 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 206 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 207 stride, 8, 8); \ 208} \ 209 \ 210static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \ 211 const uint8_t *src, \ 212 ptrdiff_t stride) \ 213{ \ 214 uint64_t half[8 + 9]; \ 215 uint8_t *const halfH = (uint8_t *) half + 64; \ 216 uint8_t *const halfHV = (uint8_t *) half; \ 217 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 218 stride, 9); \ 219 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 220 stride, 9); \ 221 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 222 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 223 stride, 8, 8); \ 224} \ 225 \ 226static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \ 227 const uint8_t *src, \ 228 ptrdiff_t stride) \ 229{ \ 230 uint64_t half[8 + 9]; \ 231 uint8_t *const halfH = (uint8_t *) half + 64; \ 232 uint8_t *const halfHV = (uint8_t *) half; \ 233 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 234 stride, 9); \ 235 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 236 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 237 stride, 8, 8); \ 238} \ 239 \ 240static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \ 241 const uint8_t *src, \ 242 ptrdiff_t stride) \ 243{ \ 244 uint64_t half[8 + 9]; \ 245 uint8_t *const halfH = (uint8_t *) half + 64; \ 246 uint8_t *const halfHV = (uint8_t *) half; \ 247 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 248 stride, 9); \ 249 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 250 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 251 stride, 8, 8); \ 252} \ 253 \ 254static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \ 255 const uint8_t *src, \ 256 ptrdiff_t stride) \ 257{ \ 258 uint64_t half[8 + 9]; \ 259 uint8_t *const halfH = (uint8_t *) half; \ 260 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 261 stride, 9); \ 262 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \ 263 8, stride, 9); \ 264 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 265 stride, 8); \ 266} \ 267 \ 268static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \ 269 const uint8_t *src, \ 270 ptrdiff_t stride) \ 271{ \ 272 uint64_t half[8 + 9]; \ 273 uint8_t *const halfH = (uint8_t *) half; \ 274 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 275 stride, 9); \ 276 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 277 stride, 9); \ 278 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 279 stride, 8); \ 280} \ 281 \ 282static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \ 283 const uint8_t *src, \ 284 ptrdiff_t stride) \ 285{ \ 286 uint64_t half[9]; \ 287 uint8_t *const halfH = (uint8_t *) half; \ 288 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 289 stride, 9); \ 290 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 291 stride, 8); \ 292} \ 293 \ 294static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \ 295 const uint8_t *src, \ 296 ptrdiff_t stride) \ 297{ \ 298 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \ 299} \ 300 \ 301static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \ 302 const uint8_t *src, \ 303 ptrdiff_t stride) \ 304{ \ 305 uint64_t temp[32]; \ 306 uint8_t *const half = (uint8_t *) temp; \ 307 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ 308 stride, 16); \ 309 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ 310 stride, 16); \ 311} \ 312 \ 313static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \ 314 const uint8_t *src, \ 315 ptrdiff_t stride) \ 316{ \ 317 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \ 318 stride, stride, 16);\ 319} \ 320 \ 321static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \ 322 const uint8_t *src, \ 323 ptrdiff_t stride) \ 324{ \ 325 uint64_t temp[32]; \ 326 uint8_t *const half = (uint8_t*) temp; \ 327 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ 328 stride, 16); \ 329 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \ 330 stride, stride, 16); \ 331} \ 332 \ 333static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \ 334 const uint8_t *src, \ 335 ptrdiff_t stride) \ 336{ \ 337 uint64_t temp[32]; \ 338 uint8_t *const half = (uint8_t *) temp; \ 339 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ 340 stride); \ 341 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ 342 stride, 16); \ 343} \ 344 \ 345static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \ 346 const uint8_t *src, \ 347 ptrdiff_t stride) \ 348{ \ 349 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \ 350 stride, stride); \ 351} \ 352 \ 353static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \ 354 const uint8_t *src, \ 355 ptrdiff_t stride) \ 356{ \ 357 uint64_t temp[32]; \ 358 uint8_t *const half = (uint8_t *) temp; \ 359 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ 360 stride); \ 361 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \ 362 stride, stride, 16); \ 363} \ 364 \ 365static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \ 366 const uint8_t *src, \ 367 ptrdiff_t stride) \ 368{ \ 369 uint64_t half[16 * 2 + 17 * 2]; \ 370 uint8_t *const halfH = (uint8_t *) half + 256; \ 371 uint8_t *const halfHV = (uint8_t *) half; \ 372 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 373 stride, 17); \ 374 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 375 stride, 17); \ 376 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 377 16, 16); \ 378 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 379 stride, 16, 16); \ 380} \ 381 \ 382static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \ 383 const uint8_t *src, \ 384 ptrdiff_t stride) \ 385{ \ 386 uint64_t half[16 * 2 + 17 * 2]; \ 387 uint8_t *const halfH = (uint8_t *) half + 256; \ 388 uint8_t *const halfHV = (uint8_t *) half; \ 389 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 390 stride, 17); \ 391 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 392 stride, 17); \ 393 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 394 16, 16); \ 395 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 396 stride, 16, 16); \ 397} \ 398 \ 399static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \ 400 const uint8_t *src, \ 401 ptrdiff_t stride) \ 402{ \ 403 uint64_t half[16 * 2 + 17 * 2]; \ 404 uint8_t *const halfH = (uint8_t *) half + 256; \ 405 uint8_t *const halfHV = (uint8_t *) half; \ 406 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 407 stride, 17); \ 408 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 409 stride, 17); \ 410 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 411 16, 16); \ 412 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 413 stride, 16, 16); \ 414} \ 415 \ 416static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \ 417 const uint8_t *src, \ 418 ptrdiff_t stride) \ 419{ \ 420 uint64_t half[16 * 2 + 17 * 2]; \ 421 uint8_t *const halfH = (uint8_t *) half + 256; \ 422 uint8_t *const halfHV = (uint8_t *) half; \ 423 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 424 stride, 17); \ 425 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 426 stride, 17); \ 427 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 428 16, 16); \ 429 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 430 stride, 16, 16); \ 431} \ 432 \ 433static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \ 434 const uint8_t *src, \ 435 ptrdiff_t stride) \ 436{ \ 437 uint64_t half[16 * 2 + 17 * 2]; \ 438 uint8_t *const halfH = (uint8_t *) half + 256; \ 439 uint8_t *const halfHV = (uint8_t *) half; \ 440 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 441 stride, 17); \ 442 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 443 16, 16); \ 444 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 445 stride, 16, 16); \ 446} \ 447 \ 448static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \ 449 const uint8_t *src, \ 450 ptrdiff_t stride) \ 451{ \ 452 uint64_t half[16 * 2 + 17 * 2]; \ 453 uint8_t *const halfH = (uint8_t *) half + 256; \ 454 uint8_t *const halfHV = (uint8_t *) half; \ 455 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 456 stride, 17); \ 457 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 458 16, 16); \ 459 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 460 stride, 16, 16); \ 461} \ 462 \ 463static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \ 464 const uint8_t *src, \ 465 ptrdiff_t stride) \ 466{ \ 467 uint64_t half[17 * 2]; \ 468 uint8_t *const halfH = (uint8_t *) half; \ 469 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 470 stride, 17); \ 471 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 472 stride, 17); \ 473 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 474 stride, 16); \ 475} \ 476 \ 477static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \ 478 const uint8_t *src, \ 479 ptrdiff_t stride) \ 480{ \ 481 uint64_t half[17 * 2]; \ 482 uint8_t *const halfH = (uint8_t *) half; \ 483 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 484 stride, 17); \ 485 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 486 stride, 17); \ 487 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 488 stride, 16); \ 489} \ 490 \ 491static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \ 492 const uint8_t *src, \ 493 ptrdiff_t stride) \ 494{ \ 495 uint64_t half[17 * 2]; \ 496 uint8_t *const halfH = (uint8_t *) half; \ 497 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 498 stride, 17); \ 499 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 500 stride, 16); \ 501} 502 503QPEL_OP(put_, _, mmxext) 504QPEL_OP(avg_, _, mmxext) 505QPEL_OP(put_no_rnd_, _no_rnd_, mmxext) 506 507#endif /* HAVE_X86ASM */ 508 509#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ 510do { \ 511 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ 512 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ 513 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ 514 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ 515 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ 516 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ 517 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ 518 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ 519 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ 520 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ 521 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ 522 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ 523 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ 524 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ 525 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ 526 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ 527} while (0) 528 529av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c) 530{ 531 int cpu_flags = av_get_cpu_flags(); 532 533 if (X86_MMXEXT(cpu_flags)) { 534#if HAVE_MMXEXT_EXTERNAL 535 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); 536 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); 537 538 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); 539 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); 540 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); 541 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); 542#endif /* HAVE_MMXEXT_EXTERNAL */ 543 } 544} 545