1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * quarterpel DSP functions 3cabdff1aSopenharmony_ci * Copyright (c) 2000, 2001 Fabrice Bellard 4cabdff1aSopenharmony_ci * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include <stddef.h> 24cabdff1aSopenharmony_ci#include <stdint.h> 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "config.h" 27cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 28cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 29cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 30cabdff1aSopenharmony_ci#include "libavcodec/pixels.h" 31cabdff1aSopenharmony_ci#include "libavcodec/qpeldsp.h" 32cabdff1aSopenharmony_ci#include "fpel.h" 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_civoid ff_put_pixels8_l2_mmxext(uint8_t *dst, 35cabdff1aSopenharmony_ci const uint8_t *src1, const uint8_t *src2, 36cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 37cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, 38cabdff1aSopenharmony_ci const uint8_t *src1, const uint8_t *src2, 39cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 40cabdff1aSopenharmony_civoid ff_avg_pixels8_l2_mmxext(uint8_t *dst, 41cabdff1aSopenharmony_ci const uint8_t *src1, const uint8_t *src2, 42cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 43cabdff1aSopenharmony_civoid ff_put_pixels16_l2_mmxext(uint8_t *dst, 44cabdff1aSopenharmony_ci const uint8_t *src1, const uint8_t *src2, 45cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 46cabdff1aSopenharmony_civoid ff_avg_pixels16_l2_mmxext(uint8_t *dst, 47cabdff1aSopenharmony_ci const uint8_t *src1, const uint8_t *src2, 48cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 49cabdff1aSopenharmony_civoid ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, 50cabdff1aSopenharmony_ci const uint8_t *src1, const uint8_t *src2, 51cabdff1aSopenharmony_ci int dstStride, int src1Stride, int h); 52cabdff1aSopenharmony_civoid ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 53cabdff1aSopenharmony_ci int dstStride, int srcStride, int h); 54cabdff1aSopenharmony_civoid ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 55cabdff1aSopenharmony_ci int dstStride, int srcStride, int h); 56cabdff1aSopenharmony_civoid ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, 57cabdff1aSopenharmony_ci const uint8_t *src, 58cabdff1aSopenharmony_ci int dstStride, int srcStride, 59cabdff1aSopenharmony_ci int h); 60cabdff1aSopenharmony_civoid ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 61cabdff1aSopenharmony_ci int dstStride, int srcStride, int h); 62cabdff1aSopenharmony_civoid ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 63cabdff1aSopenharmony_ci int dstStride, int srcStride, int h); 64cabdff1aSopenharmony_civoid ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, 65cabdff1aSopenharmony_ci const uint8_t *src, 66cabdff1aSopenharmony_ci int dstStride, int srcStride, 67cabdff1aSopenharmony_ci int h); 68cabdff1aSopenharmony_civoid ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 69cabdff1aSopenharmony_ci int dstStride, int srcStride); 70cabdff1aSopenharmony_civoid ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 71cabdff1aSopenharmony_ci int dstStride, int srcStride); 72cabdff1aSopenharmony_civoid ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, 73cabdff1aSopenharmony_ci const uint8_t *src, 74cabdff1aSopenharmony_ci int dstStride, int srcStride); 75cabdff1aSopenharmony_civoid ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 76cabdff1aSopenharmony_ci int dstStride, int srcStride); 77cabdff1aSopenharmony_civoid ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, 78cabdff1aSopenharmony_ci int dstStride, int srcStride); 79cabdff1aSopenharmony_civoid ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, 80cabdff1aSopenharmony_ci const uint8_t *src, 81cabdff1aSopenharmony_ci int dstStride, int srcStride); 82cabdff1aSopenharmony_ci#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx 83cabdff1aSopenharmony_ci#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx 84cabdff1aSopenharmony_ci 85cabdff1aSopenharmony_ci#if HAVE_X86ASM 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_ci#define ff_put_pixels16_mmxext ff_put_pixels16_mmx 88cabdff1aSopenharmony_ci#define ff_put_pixels8_mmxext ff_put_pixels8_mmx 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci#define QPEL_OP(OPNAME, RND, MMX) \ 91cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \ 92cabdff1aSopenharmony_ci const uint8_t *src, \ 93cabdff1aSopenharmony_ci ptrdiff_t stride) \ 94cabdff1aSopenharmony_ci{ \ 95cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \ 96cabdff1aSopenharmony_ci} \ 97cabdff1aSopenharmony_ci \ 98cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ 99cabdff1aSopenharmony_ci const uint8_t *src, \ 100cabdff1aSopenharmony_ci ptrdiff_t stride) \ 101cabdff1aSopenharmony_ci{ \ 102cabdff1aSopenharmony_ci uint64_t temp[8]; \ 103cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 104cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ 105cabdff1aSopenharmony_ci stride, 8); \ 106cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ 107cabdff1aSopenharmony_ci stride, stride, 8); \ 108cabdff1aSopenharmony_ci} \ 109cabdff1aSopenharmony_ci \ 110cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \ 111cabdff1aSopenharmony_ci const uint8_t *src, \ 112cabdff1aSopenharmony_ci ptrdiff_t stride) \ 113cabdff1aSopenharmony_ci{ \ 114cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \ 115cabdff1aSopenharmony_ci stride, 8); \ 116cabdff1aSopenharmony_ci} \ 117cabdff1aSopenharmony_ci \ 118cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \ 119cabdff1aSopenharmony_ci const uint8_t *src, \ 120cabdff1aSopenharmony_ci ptrdiff_t stride) \ 121cabdff1aSopenharmony_ci{ \ 122cabdff1aSopenharmony_ci uint64_t temp[8]; \ 123cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 124cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ 125cabdff1aSopenharmony_ci stride, 8); \ 126cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \ 127cabdff1aSopenharmony_ci stride, 8); \ 128cabdff1aSopenharmony_ci} \ 129cabdff1aSopenharmony_ci \ 130cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \ 131cabdff1aSopenharmony_ci const uint8_t *src, \ 132cabdff1aSopenharmony_ci ptrdiff_t stride) \ 133cabdff1aSopenharmony_ci{ \ 134cabdff1aSopenharmony_ci uint64_t temp[8]; \ 135cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 136cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 137cabdff1aSopenharmony_ci 8, stride); \ 138cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ 139cabdff1aSopenharmony_ci stride, stride, 8); \ 140cabdff1aSopenharmony_ci} \ 141cabdff1aSopenharmony_ci \ 142cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \ 143cabdff1aSopenharmony_ci const uint8_t *src, \ 144cabdff1aSopenharmony_ci ptrdiff_t stride) \ 145cabdff1aSopenharmony_ci{ \ 146cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \ 147cabdff1aSopenharmony_ci stride, stride); \ 148cabdff1aSopenharmony_ci} \ 149cabdff1aSopenharmony_ci \ 150cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \ 151cabdff1aSopenharmony_ci const uint8_t *src, \ 152cabdff1aSopenharmony_ci ptrdiff_t stride) \ 153cabdff1aSopenharmony_ci{ \ 154cabdff1aSopenharmony_ci uint64_t temp[8]; \ 155cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 156cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 157cabdff1aSopenharmony_ci 8, stride); \ 158cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\ 159cabdff1aSopenharmony_ci stride, 8); \ 160cabdff1aSopenharmony_ci} \ 161cabdff1aSopenharmony_ci \ 162cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \ 163cabdff1aSopenharmony_ci const uint8_t *src, \ 164cabdff1aSopenharmony_ci ptrdiff_t stride) \ 165cabdff1aSopenharmony_ci{ \ 166cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 167cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 64; \ 168cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 169cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 170cabdff1aSopenharmony_ci stride, 9); \ 171cabdff1aSopenharmony_ci ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ 172cabdff1aSopenharmony_ci stride, 9); \ 173cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 174cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 175cabdff1aSopenharmony_ci stride, 8, 8); \ 176cabdff1aSopenharmony_ci} \ 177cabdff1aSopenharmony_ci \ 178cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \ 179cabdff1aSopenharmony_ci const uint8_t *src, \ 180cabdff1aSopenharmony_ci ptrdiff_t stride) \ 181cabdff1aSopenharmony_ci{ \ 182cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 183cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 64; \ 184cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 185cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 186cabdff1aSopenharmony_ci stride, 9); \ 187cabdff1aSopenharmony_ci ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 188cabdff1aSopenharmony_ci stride, 9); \ 189cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 190cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 191cabdff1aSopenharmony_ci stride, 8, 8); \ 192cabdff1aSopenharmony_ci} \ 193cabdff1aSopenharmony_ci \ 194cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \ 195cabdff1aSopenharmony_ci const uint8_t *src, \ 196cabdff1aSopenharmony_ci ptrdiff_t stride) \ 197cabdff1aSopenharmony_ci{ \ 198cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 199cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 64; \ 200cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 201cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 202cabdff1aSopenharmony_ci stride, 9); \ 203cabdff1aSopenharmony_ci ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ 204cabdff1aSopenharmony_ci stride, 9); \ 205cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 206cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 207cabdff1aSopenharmony_ci stride, 8, 8); \ 208cabdff1aSopenharmony_ci} \ 209cabdff1aSopenharmony_ci \ 210cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \ 211cabdff1aSopenharmony_ci const uint8_t *src, \ 212cabdff1aSopenharmony_ci ptrdiff_t stride) \ 213cabdff1aSopenharmony_ci{ \ 214cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 215cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 64; \ 216cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 217cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 218cabdff1aSopenharmony_ci stride, 9); \ 219cabdff1aSopenharmony_ci ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 220cabdff1aSopenharmony_ci stride, 9); \ 221cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 222cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 223cabdff1aSopenharmony_ci stride, 8, 8); \ 224cabdff1aSopenharmony_ci} \ 225cabdff1aSopenharmony_ci \ 226cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \ 227cabdff1aSopenharmony_ci const uint8_t *src, \ 228cabdff1aSopenharmony_ci ptrdiff_t stride) \ 229cabdff1aSopenharmony_ci{ \ 230cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 231cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 64; \ 232cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 233cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 234cabdff1aSopenharmony_ci stride, 9); \ 235cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 236cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ 237cabdff1aSopenharmony_ci stride, 8, 8); \ 238cabdff1aSopenharmony_ci} \ 239cabdff1aSopenharmony_ci \ 240cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \ 241cabdff1aSopenharmony_ci const uint8_t *src, \ 242cabdff1aSopenharmony_ci ptrdiff_t stride) \ 243cabdff1aSopenharmony_ci{ \ 244cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 245cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 64; \ 246cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 247cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 248cabdff1aSopenharmony_ci stride, 9); \ 249cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 250cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ 251cabdff1aSopenharmony_ci stride, 8, 8); \ 252cabdff1aSopenharmony_ci} \ 253cabdff1aSopenharmony_ci \ 254cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \ 255cabdff1aSopenharmony_ci const uint8_t *src, \ 256cabdff1aSopenharmony_ci ptrdiff_t stride) \ 257cabdff1aSopenharmony_ci{ \ 258cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 259cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half; \ 260cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 261cabdff1aSopenharmony_ci stride, 9); \ 262cabdff1aSopenharmony_ci ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \ 263cabdff1aSopenharmony_ci 8, stride, 9); \ 264cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 265cabdff1aSopenharmony_ci stride, 8); \ 266cabdff1aSopenharmony_ci} \ 267cabdff1aSopenharmony_ci \ 268cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \ 269cabdff1aSopenharmony_ci const uint8_t *src, \ 270cabdff1aSopenharmony_ci ptrdiff_t stride) \ 271cabdff1aSopenharmony_ci{ \ 272cabdff1aSopenharmony_ci uint64_t half[8 + 9]; \ 273cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half; \ 274cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 275cabdff1aSopenharmony_ci stride, 9); \ 276cabdff1aSopenharmony_ci ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ 277cabdff1aSopenharmony_ci stride, 9); \ 278cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 279cabdff1aSopenharmony_ci stride, 8); \ 280cabdff1aSopenharmony_ci} \ 281cabdff1aSopenharmony_ci \ 282cabdff1aSopenharmony_cistatic void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \ 283cabdff1aSopenharmony_ci const uint8_t *src, \ 284cabdff1aSopenharmony_ci ptrdiff_t stride) \ 285cabdff1aSopenharmony_ci{ \ 286cabdff1aSopenharmony_ci uint64_t half[9]; \ 287cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half; \ 288cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ 289cabdff1aSopenharmony_ci stride, 9); \ 290cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ 291cabdff1aSopenharmony_ci stride, 8); \ 292cabdff1aSopenharmony_ci} \ 293cabdff1aSopenharmony_ci \ 294cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \ 295cabdff1aSopenharmony_ci const uint8_t *src, \ 296cabdff1aSopenharmony_ci ptrdiff_t stride) \ 297cabdff1aSopenharmony_ci{ \ 298cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \ 299cabdff1aSopenharmony_ci} \ 300cabdff1aSopenharmony_ci \ 301cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \ 302cabdff1aSopenharmony_ci const uint8_t *src, \ 303cabdff1aSopenharmony_ci ptrdiff_t stride) \ 304cabdff1aSopenharmony_ci{ \ 305cabdff1aSopenharmony_ci uint64_t temp[32]; \ 306cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 307cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ 308cabdff1aSopenharmony_ci stride, 16); \ 309cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ 310cabdff1aSopenharmony_ci stride, 16); \ 311cabdff1aSopenharmony_ci} \ 312cabdff1aSopenharmony_ci \ 313cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \ 314cabdff1aSopenharmony_ci const uint8_t *src, \ 315cabdff1aSopenharmony_ci ptrdiff_t stride) \ 316cabdff1aSopenharmony_ci{ \ 317cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \ 318cabdff1aSopenharmony_ci stride, stride, 16);\ 319cabdff1aSopenharmony_ci} \ 320cabdff1aSopenharmony_ci \ 321cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \ 322cabdff1aSopenharmony_ci const uint8_t *src, \ 323cabdff1aSopenharmony_ci ptrdiff_t stride) \ 324cabdff1aSopenharmony_ci{ \ 325cabdff1aSopenharmony_ci uint64_t temp[32]; \ 326cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t*) temp; \ 327cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ 328cabdff1aSopenharmony_ci stride, 16); \ 329cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \ 330cabdff1aSopenharmony_ci stride, stride, 16); \ 331cabdff1aSopenharmony_ci} \ 332cabdff1aSopenharmony_ci \ 333cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \ 334cabdff1aSopenharmony_ci const uint8_t *src, \ 335cabdff1aSopenharmony_ci ptrdiff_t stride) \ 336cabdff1aSopenharmony_ci{ \ 337cabdff1aSopenharmony_ci uint64_t temp[32]; \ 338cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 339cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ 340cabdff1aSopenharmony_ci stride); \ 341cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ 342cabdff1aSopenharmony_ci stride, 16); \ 343cabdff1aSopenharmony_ci} \ 344cabdff1aSopenharmony_ci \ 345cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \ 346cabdff1aSopenharmony_ci const uint8_t *src, \ 347cabdff1aSopenharmony_ci ptrdiff_t stride) \ 348cabdff1aSopenharmony_ci{ \ 349cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \ 350cabdff1aSopenharmony_ci stride, stride); \ 351cabdff1aSopenharmony_ci} \ 352cabdff1aSopenharmony_ci \ 353cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \ 354cabdff1aSopenharmony_ci const uint8_t *src, \ 355cabdff1aSopenharmony_ci ptrdiff_t stride) \ 356cabdff1aSopenharmony_ci{ \ 357cabdff1aSopenharmony_ci uint64_t temp[32]; \ 358cabdff1aSopenharmony_ci uint8_t *const half = (uint8_t *) temp; \ 359cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ 360cabdff1aSopenharmony_ci stride); \ 361cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \ 362cabdff1aSopenharmony_ci stride, stride, 16); \ 363cabdff1aSopenharmony_ci} \ 364cabdff1aSopenharmony_ci \ 365cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \ 366cabdff1aSopenharmony_ci const uint8_t *src, \ 367cabdff1aSopenharmony_ci ptrdiff_t stride) \ 368cabdff1aSopenharmony_ci{ \ 369cabdff1aSopenharmony_ci uint64_t half[16 * 2 + 17 * 2]; \ 370cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 256; \ 371cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 372cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 373cabdff1aSopenharmony_ci stride, 17); \ 374cabdff1aSopenharmony_ci ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 375cabdff1aSopenharmony_ci stride, 17); \ 376cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 377cabdff1aSopenharmony_ci 16, 16); \ 378cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 379cabdff1aSopenharmony_ci stride, 16, 16); \ 380cabdff1aSopenharmony_ci} \ 381cabdff1aSopenharmony_ci \ 382cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \ 383cabdff1aSopenharmony_ci const uint8_t *src, \ 384cabdff1aSopenharmony_ci ptrdiff_t stride) \ 385cabdff1aSopenharmony_ci{ \ 386cabdff1aSopenharmony_ci uint64_t half[16 * 2 + 17 * 2]; \ 387cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 256; \ 388cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 389cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 390cabdff1aSopenharmony_ci stride, 17); \ 391cabdff1aSopenharmony_ci ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 392cabdff1aSopenharmony_ci stride, 17); \ 393cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 394cabdff1aSopenharmony_ci 16, 16); \ 395cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 396cabdff1aSopenharmony_ci stride, 16, 16); \ 397cabdff1aSopenharmony_ci} \ 398cabdff1aSopenharmony_ci \ 399cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \ 400cabdff1aSopenharmony_ci const uint8_t *src, \ 401cabdff1aSopenharmony_ci ptrdiff_t stride) \ 402cabdff1aSopenharmony_ci{ \ 403cabdff1aSopenharmony_ci uint64_t half[16 * 2 + 17 * 2]; \ 404cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 256; \ 405cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 406cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 407cabdff1aSopenharmony_ci stride, 17); \ 408cabdff1aSopenharmony_ci ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 409cabdff1aSopenharmony_ci stride, 17); \ 410cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 411cabdff1aSopenharmony_ci 16, 16); \ 412cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 413cabdff1aSopenharmony_ci stride, 16, 16); \ 414cabdff1aSopenharmony_ci} \ 415cabdff1aSopenharmony_ci \ 416cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \ 417cabdff1aSopenharmony_ci const uint8_t *src, \ 418cabdff1aSopenharmony_ci ptrdiff_t stride) \ 419cabdff1aSopenharmony_ci{ \ 420cabdff1aSopenharmony_ci uint64_t half[16 * 2 + 17 * 2]; \ 421cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 256; \ 422cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 423cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 424cabdff1aSopenharmony_ci stride, 17); \ 425cabdff1aSopenharmony_ci ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 426cabdff1aSopenharmony_ci stride, 17); \ 427cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 428cabdff1aSopenharmony_ci 16, 16); \ 429cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 430cabdff1aSopenharmony_ci stride, 16, 16); \ 431cabdff1aSopenharmony_ci} \ 432cabdff1aSopenharmony_ci \ 433cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \ 434cabdff1aSopenharmony_ci const uint8_t *src, \ 435cabdff1aSopenharmony_ci ptrdiff_t stride) \ 436cabdff1aSopenharmony_ci{ \ 437cabdff1aSopenharmony_ci uint64_t half[16 * 2 + 17 * 2]; \ 438cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 256; \ 439cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 440cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 441cabdff1aSopenharmony_ci stride, 17); \ 442cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 443cabdff1aSopenharmony_ci 16, 16); \ 444cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ 445cabdff1aSopenharmony_ci stride, 16, 16); \ 446cabdff1aSopenharmony_ci} \ 447cabdff1aSopenharmony_ci \ 448cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \ 449cabdff1aSopenharmony_ci const uint8_t *src, \ 450cabdff1aSopenharmony_ci ptrdiff_t stride) \ 451cabdff1aSopenharmony_ci{ \ 452cabdff1aSopenharmony_ci uint64_t half[16 * 2 + 17 * 2]; \ 453cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half + 256; \ 454cabdff1aSopenharmony_ci uint8_t *const halfHV = (uint8_t *) half; \ 455cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 456cabdff1aSopenharmony_ci stride, 17); \ 457cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 458cabdff1aSopenharmony_ci 16, 16); \ 459cabdff1aSopenharmony_ci ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ 460cabdff1aSopenharmony_ci stride, 16, 16); \ 461cabdff1aSopenharmony_ci} \ 462cabdff1aSopenharmony_ci \ 463cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \ 464cabdff1aSopenharmony_ci const uint8_t *src, \ 465cabdff1aSopenharmony_ci ptrdiff_t stride) \ 466cabdff1aSopenharmony_ci{ \ 467cabdff1aSopenharmony_ci uint64_t half[17 * 2]; \ 468cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half; \ 469cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 470cabdff1aSopenharmony_ci stride, 17); \ 471cabdff1aSopenharmony_ci ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ 472cabdff1aSopenharmony_ci stride, 17); \ 473cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 474cabdff1aSopenharmony_ci stride, 16); \ 475cabdff1aSopenharmony_ci} \ 476cabdff1aSopenharmony_ci \ 477cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \ 478cabdff1aSopenharmony_ci const uint8_t *src, \ 479cabdff1aSopenharmony_ci ptrdiff_t stride) \ 480cabdff1aSopenharmony_ci{ \ 481cabdff1aSopenharmony_ci uint64_t half[17 * 2]; \ 482cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half; \ 483cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 484cabdff1aSopenharmony_ci stride, 17); \ 485cabdff1aSopenharmony_ci ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ 486cabdff1aSopenharmony_ci stride, 17); \ 487cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 488cabdff1aSopenharmony_ci stride, 16); \ 489cabdff1aSopenharmony_ci} \ 490cabdff1aSopenharmony_ci \ 491cabdff1aSopenharmony_cistatic void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \ 492cabdff1aSopenharmony_ci const uint8_t *src, \ 493cabdff1aSopenharmony_ci ptrdiff_t stride) \ 494cabdff1aSopenharmony_ci{ \ 495cabdff1aSopenharmony_ci uint64_t half[17 * 2]; \ 496cabdff1aSopenharmony_ci uint8_t *const halfH = (uint8_t *) half; \ 497cabdff1aSopenharmony_ci ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ 498cabdff1aSopenharmony_ci stride, 17); \ 499cabdff1aSopenharmony_ci ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ 500cabdff1aSopenharmony_ci stride, 16); \ 501cabdff1aSopenharmony_ci} 502cabdff1aSopenharmony_ci 503cabdff1aSopenharmony_ciQPEL_OP(put_, _, mmxext) 504cabdff1aSopenharmony_ciQPEL_OP(avg_, _, mmxext) 505cabdff1aSopenharmony_ciQPEL_OP(put_no_rnd_, _no_rnd_, mmxext) 506cabdff1aSopenharmony_ci 507cabdff1aSopenharmony_ci#endif /* HAVE_X86ASM */ 508cabdff1aSopenharmony_ci 509cabdff1aSopenharmony_ci#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ 510cabdff1aSopenharmony_cido { \ 511cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ 512cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ 513cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ 514cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ 515cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ 516cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ 517cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ 518cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ 519cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ 520cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ 521cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ 522cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ 523cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ 524cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ 525cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ 526cabdff1aSopenharmony_ci c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ 527cabdff1aSopenharmony_ci} while (0) 528cabdff1aSopenharmony_ci 529cabdff1aSopenharmony_ciav_cold void ff_qpeldsp_init_x86(QpelDSPContext *c) 530cabdff1aSopenharmony_ci{ 531cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 532cabdff1aSopenharmony_ci 533cabdff1aSopenharmony_ci if (X86_MMXEXT(cpu_flags)) { 534cabdff1aSopenharmony_ci#if HAVE_MMXEXT_EXTERNAL 535cabdff1aSopenharmony_ci SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); 536cabdff1aSopenharmony_ci SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); 537cabdff1aSopenharmony_ci 538cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); 539cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); 540cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); 541cabdff1aSopenharmony_ci SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); 542cabdff1aSopenharmony_ci#endif /* HAVE_MMXEXT_EXTERNAL */ 543cabdff1aSopenharmony_ci } 544cabdff1aSopenharmony_ci} 545