1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* mpeg4 qpel 3cabdff1aSopenharmony_ci;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4cabdff1aSopenharmony_ci;* Copyright (c) 2008 Loren Merritt 5cabdff1aSopenharmony_ci;* Copyright (c) 2013 Daniel Kang 6cabdff1aSopenharmony_ci;* 7cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 8cabdff1aSopenharmony_ci;* 9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci;* 14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 18cabdff1aSopenharmony_ci;* 19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci;****************************************************************************** 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ciSECTION_RODATA 27cabdff1aSopenharmony_cicextern pb_1 28cabdff1aSopenharmony_cicextern pw_3 29cabdff1aSopenharmony_cicextern pw_15 30cabdff1aSopenharmony_cicextern pw_16 31cabdff1aSopenharmony_cicextern pw_20 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ciSECTION .text 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) 37cabdff1aSopenharmony_ci%macro PUT_NO_RND_PIXELS8_L2 0 38cabdff1aSopenharmony_cicglobal put_no_rnd_pixels8_l2, 6,6 39cabdff1aSopenharmony_ci movsxdifnidn r4, r4d 40cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 41cabdff1aSopenharmony_ci pcmpeqb m6, m6 42cabdff1aSopenharmony_ci test r5d, 1 43cabdff1aSopenharmony_ci je .loop 44cabdff1aSopenharmony_ci mova m0, [r1] 45cabdff1aSopenharmony_ci mova m1, [r2] 46cabdff1aSopenharmony_ci add r1, r4 47cabdff1aSopenharmony_ci add r2, 8 48cabdff1aSopenharmony_ci pxor m0, m6 49cabdff1aSopenharmony_ci pxor m1, m6 50cabdff1aSopenharmony_ci PAVGB m0, m1 51cabdff1aSopenharmony_ci pxor m0, m6 52cabdff1aSopenharmony_ci mova [r0], m0 53cabdff1aSopenharmony_ci add r0, r3 54cabdff1aSopenharmony_ci dec r5d 55cabdff1aSopenharmony_ci.loop: 56cabdff1aSopenharmony_ci mova m0, [r1] 57cabdff1aSopenharmony_ci add r1, r4 58cabdff1aSopenharmony_ci mova m1, [r1] 59cabdff1aSopenharmony_ci add r1, r4 60cabdff1aSopenharmony_ci mova m2, [r2] 61cabdff1aSopenharmony_ci mova m3, [r2+8] 62cabdff1aSopenharmony_ci pxor m0, m6 63cabdff1aSopenharmony_ci pxor m1, m6 64cabdff1aSopenharmony_ci pxor m2, m6 65cabdff1aSopenharmony_ci pxor m3, m6 66cabdff1aSopenharmony_ci PAVGB m0, m2 67cabdff1aSopenharmony_ci PAVGB m1, m3 68cabdff1aSopenharmony_ci pxor m0, m6 69cabdff1aSopenharmony_ci pxor m1, m6 70cabdff1aSopenharmony_ci mova [r0], m0 71cabdff1aSopenharmony_ci add r0, r3 72cabdff1aSopenharmony_ci mova [r0], m1 73cabdff1aSopenharmony_ci add r0, r3 74cabdff1aSopenharmony_ci mova m0, [r1] 75cabdff1aSopenharmony_ci add r1, r4 76cabdff1aSopenharmony_ci mova m1, [r1] 77cabdff1aSopenharmony_ci add r1, r4 78cabdff1aSopenharmony_ci mova m2, [r2+16] 79cabdff1aSopenharmony_ci mova m3, [r2+24] 80cabdff1aSopenharmony_ci pxor m0, m6 81cabdff1aSopenharmony_ci pxor m1, m6 82cabdff1aSopenharmony_ci pxor m2, m6 83cabdff1aSopenharmony_ci pxor m3, m6 84cabdff1aSopenharmony_ci PAVGB m0, m2 85cabdff1aSopenharmony_ci PAVGB m1, m3 86cabdff1aSopenharmony_ci pxor m0, m6 87cabdff1aSopenharmony_ci pxor m1, m6 88cabdff1aSopenharmony_ci mova [r0], m0 89cabdff1aSopenharmony_ci add r0, r3 90cabdff1aSopenharmony_ci mova [r0], m1 91cabdff1aSopenharmony_ci add r0, r3 92cabdff1aSopenharmony_ci add r2, 32 93cabdff1aSopenharmony_ci sub r5d, 4 94cabdff1aSopenharmony_ci jne .loop 95cabdff1aSopenharmony_ci REP_RET 96cabdff1aSopenharmony_ci%endmacro 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ciINIT_MMX mmxext 99cabdff1aSopenharmony_ciPUT_NO_RND_PIXELS8_L2 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) 103cabdff1aSopenharmony_ci%macro PUT_NO_RND_PIXELS16_l2 0 104cabdff1aSopenharmony_cicglobal put_no_rnd_pixels16_l2, 6,6 105cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 106cabdff1aSopenharmony_ci movsxdifnidn r4, r4d 107cabdff1aSopenharmony_ci pcmpeqb m6, m6 108cabdff1aSopenharmony_ci test r5d, 1 109cabdff1aSopenharmony_ci je .loop 110cabdff1aSopenharmony_ci mova m0, [r1] 111cabdff1aSopenharmony_ci mova m1, [r1+8] 112cabdff1aSopenharmony_ci mova m2, [r2] 113cabdff1aSopenharmony_ci mova m3, [r2+8] 114cabdff1aSopenharmony_ci pxor m0, m6 115cabdff1aSopenharmony_ci pxor m1, m6 116cabdff1aSopenharmony_ci pxor m2, m6 117cabdff1aSopenharmony_ci pxor m3, m6 118cabdff1aSopenharmony_ci PAVGB m0, m2 119cabdff1aSopenharmony_ci PAVGB m1, m3 120cabdff1aSopenharmony_ci pxor m0, m6 121cabdff1aSopenharmony_ci pxor m1, m6 122cabdff1aSopenharmony_ci add r1, r4 123cabdff1aSopenharmony_ci add r2, 16 124cabdff1aSopenharmony_ci mova [r0], m0 125cabdff1aSopenharmony_ci mova [r0+8], m1 126cabdff1aSopenharmony_ci add r0, r3 127cabdff1aSopenharmony_ci dec r5d 128cabdff1aSopenharmony_ci.loop: 129cabdff1aSopenharmony_ci mova m0, [r1] 130cabdff1aSopenharmony_ci mova m1, [r1+8] 131cabdff1aSopenharmony_ci add r1, r4 132cabdff1aSopenharmony_ci mova m2, [r2] 133cabdff1aSopenharmony_ci mova m3, [r2+8] 134cabdff1aSopenharmony_ci pxor m0, m6 135cabdff1aSopenharmony_ci pxor m1, m6 136cabdff1aSopenharmony_ci pxor m2, m6 137cabdff1aSopenharmony_ci pxor m3, m6 138cabdff1aSopenharmony_ci PAVGB m0, m2 139cabdff1aSopenharmony_ci PAVGB m1, m3 140cabdff1aSopenharmony_ci pxor m0, m6 141cabdff1aSopenharmony_ci pxor m1, m6 142cabdff1aSopenharmony_ci mova [r0], m0 143cabdff1aSopenharmony_ci mova [r0+8], m1 144cabdff1aSopenharmony_ci add r0, r3 145cabdff1aSopenharmony_ci mova m0, [r1] 146cabdff1aSopenharmony_ci mova m1, [r1+8] 147cabdff1aSopenharmony_ci add r1, r4 148cabdff1aSopenharmony_ci mova m2, [r2+16] 149cabdff1aSopenharmony_ci mova m3, [r2+24] 150cabdff1aSopenharmony_ci pxor m0, m6 151cabdff1aSopenharmony_ci pxor m1, m6 152cabdff1aSopenharmony_ci pxor m2, m6 153cabdff1aSopenharmony_ci pxor m3, m6 154cabdff1aSopenharmony_ci PAVGB m0, m2 155cabdff1aSopenharmony_ci PAVGB m1, m3 156cabdff1aSopenharmony_ci pxor m0, m6 157cabdff1aSopenharmony_ci pxor m1, m6 158cabdff1aSopenharmony_ci mova [r0], m0 159cabdff1aSopenharmony_ci mova [r0+8], m1 160cabdff1aSopenharmony_ci add r0, r3 161cabdff1aSopenharmony_ci add r2, 32 162cabdff1aSopenharmony_ci sub r5d, 2 163cabdff1aSopenharmony_ci jne .loop 164cabdff1aSopenharmony_ci REP_RET 165cabdff1aSopenharmony_ci%endmacro 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ciINIT_MMX mmxext 168cabdff1aSopenharmony_ciPUT_NO_RND_PIXELS16_l2 169cabdff1aSopenharmony_ci 170cabdff1aSopenharmony_ci%macro MPEG4_QPEL16_H_LOWPASS 1 171cabdff1aSopenharmony_cicglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16 172cabdff1aSopenharmony_ci movsxdifnidn r2, r2d 173cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 174cabdff1aSopenharmony_ci pxor m7, m7 175cabdff1aSopenharmony_ci.loop: 176cabdff1aSopenharmony_ci mova m0, [r1] 177cabdff1aSopenharmony_ci mova m1, m0 178cabdff1aSopenharmony_ci mova m2, m0 179cabdff1aSopenharmony_ci punpcklbw m0, m7 180cabdff1aSopenharmony_ci punpckhbw m1, m7 181cabdff1aSopenharmony_ci pshufw m5, m0, 0x90 182cabdff1aSopenharmony_ci pshufw m6, m0, 0x41 183cabdff1aSopenharmony_ci mova m3, m2 184cabdff1aSopenharmony_ci mova m4, m2 185cabdff1aSopenharmony_ci psllq m2, 8 186cabdff1aSopenharmony_ci psllq m3, 16 187cabdff1aSopenharmony_ci psllq m4, 24 188cabdff1aSopenharmony_ci punpckhbw m2, m7 189cabdff1aSopenharmony_ci punpckhbw m3, m7 190cabdff1aSopenharmony_ci punpckhbw m4, m7 191cabdff1aSopenharmony_ci paddw m5, m3 192cabdff1aSopenharmony_ci paddw m6, m2 193cabdff1aSopenharmony_ci paddw m5, m5 194cabdff1aSopenharmony_ci psubw m6, m5 195cabdff1aSopenharmony_ci pshufw m5, m0, 6 196cabdff1aSopenharmony_ci pmullw m6, [pw_3] 197cabdff1aSopenharmony_ci paddw m0, m4 198cabdff1aSopenharmony_ci paddw m5, m1 199cabdff1aSopenharmony_ci pmullw m0, [pw_20] 200cabdff1aSopenharmony_ci psubw m0, m5 201cabdff1aSopenharmony_ci paddw m6, [PW_ROUND] 202cabdff1aSopenharmony_ci paddw m0, m6 203cabdff1aSopenharmony_ci psraw m0, 5 204cabdff1aSopenharmony_ci mova [rsp+8], m0 205cabdff1aSopenharmony_ci mova m0, [r1+5] 206cabdff1aSopenharmony_ci mova m5, m0 207cabdff1aSopenharmony_ci mova m6, m0 208cabdff1aSopenharmony_ci psrlq m0, 8 209cabdff1aSopenharmony_ci psrlq m5, 16 210cabdff1aSopenharmony_ci punpcklbw m0, m7 211cabdff1aSopenharmony_ci punpcklbw m5, m7 212cabdff1aSopenharmony_ci paddw m2, m0 213cabdff1aSopenharmony_ci paddw m3, m5 214cabdff1aSopenharmony_ci paddw m2, m2 215cabdff1aSopenharmony_ci psubw m3, m2 216cabdff1aSopenharmony_ci mova m2, m6 217cabdff1aSopenharmony_ci psrlq m6, 24 218cabdff1aSopenharmony_ci punpcklbw m2, m7 219cabdff1aSopenharmony_ci punpcklbw m6, m7 220cabdff1aSopenharmony_ci pmullw m3, [pw_3] 221cabdff1aSopenharmony_ci paddw m1, m2 222cabdff1aSopenharmony_ci paddw m4, m6 223cabdff1aSopenharmony_ci pmullw m1, [pw_20] 224cabdff1aSopenharmony_ci psubw m3, m4 225cabdff1aSopenharmony_ci paddw m1, [PW_ROUND] 226cabdff1aSopenharmony_ci paddw m3, m1 227cabdff1aSopenharmony_ci psraw m3, 5 228cabdff1aSopenharmony_ci mova m1, [rsp+8] 229cabdff1aSopenharmony_ci packuswb m1, m3 230cabdff1aSopenharmony_ci OP_MOV [r0], m1, m4 231cabdff1aSopenharmony_ci mova m1, [r1+9] 232cabdff1aSopenharmony_ci mova m4, m1 233cabdff1aSopenharmony_ci mova m3, m1 234cabdff1aSopenharmony_ci psrlq m1, 8 235cabdff1aSopenharmony_ci psrlq m4, 16 236cabdff1aSopenharmony_ci punpcklbw m1, m7 237cabdff1aSopenharmony_ci punpcklbw m4, m7 238cabdff1aSopenharmony_ci paddw m5, m1 239cabdff1aSopenharmony_ci paddw m0, m4 240cabdff1aSopenharmony_ci paddw m5, m5 241cabdff1aSopenharmony_ci psubw m0, m5 242cabdff1aSopenharmony_ci mova m5, m3 243cabdff1aSopenharmony_ci psrlq m3, 24 244cabdff1aSopenharmony_ci pmullw m0, [pw_3] 245cabdff1aSopenharmony_ci punpcklbw m3, m7 246cabdff1aSopenharmony_ci paddw m2, m3 247cabdff1aSopenharmony_ci psubw m0, m2 248cabdff1aSopenharmony_ci mova m2, m5 249cabdff1aSopenharmony_ci punpcklbw m2, m7 250cabdff1aSopenharmony_ci punpckhbw m5, m7 251cabdff1aSopenharmony_ci paddw m6, m2 252cabdff1aSopenharmony_ci pmullw m6, [pw_20] 253cabdff1aSopenharmony_ci paddw m0, [PW_ROUND] 254cabdff1aSopenharmony_ci paddw m0, m6 255cabdff1aSopenharmony_ci psraw m0, 5 256cabdff1aSopenharmony_ci paddw m3, m5 257cabdff1aSopenharmony_ci pshufw m6, m5, 0xf9 258cabdff1aSopenharmony_ci paddw m6, m4 259cabdff1aSopenharmony_ci pshufw m4, m5, 0xbe 260cabdff1aSopenharmony_ci pshufw m5, m5, 0x6f 261cabdff1aSopenharmony_ci paddw m4, m1 262cabdff1aSopenharmony_ci paddw m5, m2 263cabdff1aSopenharmony_ci paddw m6, m6 264cabdff1aSopenharmony_ci psubw m4, m6 265cabdff1aSopenharmony_ci pmullw m3, [pw_20] 266cabdff1aSopenharmony_ci pmullw m4, [pw_3] 267cabdff1aSopenharmony_ci psubw m3, m5 268cabdff1aSopenharmony_ci paddw m4, [PW_ROUND] 269cabdff1aSopenharmony_ci paddw m4, m3 270cabdff1aSopenharmony_ci psraw m4, 5 271cabdff1aSopenharmony_ci packuswb m0, m4 272cabdff1aSopenharmony_ci OP_MOV [r0+8], m0, m4 273cabdff1aSopenharmony_ci add r1, r3 274cabdff1aSopenharmony_ci add r0, r2 275cabdff1aSopenharmony_ci dec r4d 276cabdff1aSopenharmony_ci jne .loop 277cabdff1aSopenharmony_ci REP_RET 278cabdff1aSopenharmony_ci%endmacro 279cabdff1aSopenharmony_ci 280cabdff1aSopenharmony_ci%macro PUT_OP 2-3 281cabdff1aSopenharmony_ci mova %1, %2 282cabdff1aSopenharmony_ci%endmacro 283cabdff1aSopenharmony_ci 284cabdff1aSopenharmony_ci%macro AVG_OP 2-3 285cabdff1aSopenharmony_ci mova %3, %1 286cabdff1aSopenharmony_ci pavgb %2, %3 287cabdff1aSopenharmony_ci mova %1, %2 288cabdff1aSopenharmony_ci%endmacro 289cabdff1aSopenharmony_ci 290cabdff1aSopenharmony_ciINIT_MMX mmxext 291cabdff1aSopenharmony_ci%define PW_ROUND pw_16 292cabdff1aSopenharmony_ci%define OP_MOV PUT_OP 293cabdff1aSopenharmony_ciMPEG4_QPEL16_H_LOWPASS put 294cabdff1aSopenharmony_ci%define PW_ROUND pw_16 295cabdff1aSopenharmony_ci%define OP_MOV AVG_OP 296cabdff1aSopenharmony_ciMPEG4_QPEL16_H_LOWPASS avg 297cabdff1aSopenharmony_ci%define PW_ROUND pw_15 298cabdff1aSopenharmony_ci%define OP_MOV PUT_OP 299cabdff1aSopenharmony_ciMPEG4_QPEL16_H_LOWPASS put_no_rnd 300cabdff1aSopenharmony_ci 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci 303cabdff1aSopenharmony_ci%macro MPEG4_QPEL8_H_LOWPASS 1 304cabdff1aSopenharmony_cicglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8 305cabdff1aSopenharmony_ci movsxdifnidn r2, r2d 306cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 307cabdff1aSopenharmony_ci pxor m7, m7 308cabdff1aSopenharmony_ci.loop: 309cabdff1aSopenharmony_ci mova m0, [r1] 310cabdff1aSopenharmony_ci mova m1, m0 311cabdff1aSopenharmony_ci mova m2, m0 312cabdff1aSopenharmony_ci punpcklbw m0, m7 313cabdff1aSopenharmony_ci punpckhbw m1, m7 314cabdff1aSopenharmony_ci pshufw m5, m0, 0x90 315cabdff1aSopenharmony_ci pshufw m6, m0, 0x41 316cabdff1aSopenharmony_ci mova m3, m2 317cabdff1aSopenharmony_ci mova m4, m2 318cabdff1aSopenharmony_ci psllq m2, 8 319cabdff1aSopenharmony_ci psllq m3, 16 320cabdff1aSopenharmony_ci psllq m4, 24 321cabdff1aSopenharmony_ci punpckhbw m2, m7 322cabdff1aSopenharmony_ci punpckhbw m3, m7 323cabdff1aSopenharmony_ci punpckhbw m4, m7 324cabdff1aSopenharmony_ci paddw m5, m3 325cabdff1aSopenharmony_ci paddw m6, m2 326cabdff1aSopenharmony_ci paddw m5, m5 327cabdff1aSopenharmony_ci psubw m6, m5 328cabdff1aSopenharmony_ci pshufw m5, m0, 0x6 329cabdff1aSopenharmony_ci pmullw m6, [pw_3] 330cabdff1aSopenharmony_ci paddw m0, m4 331cabdff1aSopenharmony_ci paddw m5, m1 332cabdff1aSopenharmony_ci pmullw m0, [pw_20] 333cabdff1aSopenharmony_ci psubw m0, m5 334cabdff1aSopenharmony_ci paddw m6, [PW_ROUND] 335cabdff1aSopenharmony_ci paddw m0, m6 336cabdff1aSopenharmony_ci psraw m0, 5 337cabdff1aSopenharmony_ci movh m5, [r1+5] 338cabdff1aSopenharmony_ci punpcklbw m5, m7 339cabdff1aSopenharmony_ci pshufw m6, m5, 0xf9 340cabdff1aSopenharmony_ci paddw m1, m5 341cabdff1aSopenharmony_ci paddw m2, m6 342cabdff1aSopenharmony_ci pshufw m6, m5, 0xbe 343cabdff1aSopenharmony_ci pshufw m5, m5, 0x6f 344cabdff1aSopenharmony_ci paddw m3, m6 345cabdff1aSopenharmony_ci paddw m4, m5 346cabdff1aSopenharmony_ci paddw m2, m2 347cabdff1aSopenharmony_ci psubw m3, m2 348cabdff1aSopenharmony_ci pmullw m1, [pw_20] 349cabdff1aSopenharmony_ci pmullw m3, [pw_3] 350cabdff1aSopenharmony_ci psubw m3, m4 351cabdff1aSopenharmony_ci paddw m1, [PW_ROUND] 352cabdff1aSopenharmony_ci paddw m3, m1 353cabdff1aSopenharmony_ci psraw m3, 5 354cabdff1aSopenharmony_ci packuswb m0, m3 355cabdff1aSopenharmony_ci OP_MOV [r0], m0, m4 356cabdff1aSopenharmony_ci add r1, r3 357cabdff1aSopenharmony_ci add r0, r2 358cabdff1aSopenharmony_ci dec r4d 359cabdff1aSopenharmony_ci jne .loop 360cabdff1aSopenharmony_ci REP_RET 361cabdff1aSopenharmony_ci%endmacro 362cabdff1aSopenharmony_ci 363cabdff1aSopenharmony_ciINIT_MMX mmxext 364cabdff1aSopenharmony_ci%define PW_ROUND pw_16 365cabdff1aSopenharmony_ci%define OP_MOV PUT_OP 366cabdff1aSopenharmony_ciMPEG4_QPEL8_H_LOWPASS put 367cabdff1aSopenharmony_ci%define PW_ROUND pw_16 368cabdff1aSopenharmony_ci%define OP_MOV AVG_OP 369cabdff1aSopenharmony_ciMPEG4_QPEL8_H_LOWPASS avg 370cabdff1aSopenharmony_ci%define PW_ROUND pw_15 371cabdff1aSopenharmony_ci%define OP_MOV PUT_OP 372cabdff1aSopenharmony_ciMPEG4_QPEL8_H_LOWPASS put_no_rnd 373cabdff1aSopenharmony_ci 374cabdff1aSopenharmony_ci 375cabdff1aSopenharmony_ci 376cabdff1aSopenharmony_ci%macro QPEL_V_LOW 5 377cabdff1aSopenharmony_ci paddw m0, m1 378cabdff1aSopenharmony_ci mova m4, [pw_20] 379cabdff1aSopenharmony_ci pmullw m4, m0 380cabdff1aSopenharmony_ci mova m0, %4 381cabdff1aSopenharmony_ci mova m5, %1 382cabdff1aSopenharmony_ci paddw m5, m0 383cabdff1aSopenharmony_ci psubw m4, m5 384cabdff1aSopenharmony_ci mova m5, %2 385cabdff1aSopenharmony_ci mova m6, %3 386cabdff1aSopenharmony_ci paddw m5, m3 387cabdff1aSopenharmony_ci paddw m6, m2 388cabdff1aSopenharmony_ci paddw m6, m6 389cabdff1aSopenharmony_ci psubw m5, m6 390cabdff1aSopenharmony_ci pmullw m5, [pw_3] 391cabdff1aSopenharmony_ci paddw m4, [PW_ROUND] 392cabdff1aSopenharmony_ci paddw m5, m4 393cabdff1aSopenharmony_ci psraw m5, 5 394cabdff1aSopenharmony_ci packuswb m5, m5 395cabdff1aSopenharmony_ci OP_MOV %5, m5, m7 396cabdff1aSopenharmony_ci SWAP 0,1,2,3 397cabdff1aSopenharmony_ci%endmacro 398cabdff1aSopenharmony_ci 399cabdff1aSopenharmony_ci%macro MPEG4_QPEL16_V_LOWPASS 1 400cabdff1aSopenharmony_cicglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544 401cabdff1aSopenharmony_ci movsxdifnidn r2, r2d 402cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ci mov r4d, 17 405cabdff1aSopenharmony_ci mov r5, rsp 406cabdff1aSopenharmony_ci pxor m7, m7 407cabdff1aSopenharmony_ci.looph: 408cabdff1aSopenharmony_ci mova m0, [r1] 409cabdff1aSopenharmony_ci mova m1, [r1] 410cabdff1aSopenharmony_ci mova m2, [r1+8] 411cabdff1aSopenharmony_ci mova m3, [r1+8] 412cabdff1aSopenharmony_ci punpcklbw m0, m7 413cabdff1aSopenharmony_ci punpckhbw m1, m7 414cabdff1aSopenharmony_ci punpcklbw m2, m7 415cabdff1aSopenharmony_ci punpckhbw m3, m7 416cabdff1aSopenharmony_ci mova [r5], m0 417cabdff1aSopenharmony_ci mova [r5+0x88], m1 418cabdff1aSopenharmony_ci mova [r5+0x110], m2 419cabdff1aSopenharmony_ci mova [r5+0x198], m3 420cabdff1aSopenharmony_ci add r5, 8 421cabdff1aSopenharmony_ci add r1, r3 422cabdff1aSopenharmony_ci dec r4d 423cabdff1aSopenharmony_ci jne .looph 424cabdff1aSopenharmony_ci 425cabdff1aSopenharmony_ci 426cabdff1aSopenharmony_ci ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 14*dstStride 427cabdff1aSopenharmony_ci mov r4d, 4 428cabdff1aSopenharmony_ci mov r1, 4 429cabdff1aSopenharmony_ci neg r2 430cabdff1aSopenharmony_ci lea r1, [r1+r2*8] 431cabdff1aSopenharmony_ci lea r1, [r1+r2*4] 432cabdff1aSopenharmony_ci lea r1, [r1+r2*2] 433cabdff1aSopenharmony_ci neg r2 434cabdff1aSopenharmony_ci mov r5, rsp 435cabdff1aSopenharmony_ci.loopv: 436cabdff1aSopenharmony_ci pxor m7, m7 437cabdff1aSopenharmony_ci mova m0, [r5+ 0x0] 438cabdff1aSopenharmony_ci mova m1, [r5+ 0x8] 439cabdff1aSopenharmony_ci mova m2, [r5+0x10] 440cabdff1aSopenharmony_ci mova m3, [r5+0x18] 441cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0] 442cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2] 443cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 444cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0] 445cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2] 446cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 447cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0] 448cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x48], [r0+r2] 449cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 450cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x50], [r0] 451cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x58], [r0+r2] 452cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 453cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x28], [r5+0x30], [r5+0x38], [r5+0x60], [r0] 454cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x30], [r5+0x38], [r5+0x40], [r5+0x68], [r0+r2] 455cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 456cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x38], [r5+0x40], [r5+0x48], [r5+0x70], [r0] 457cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x40], [r5+0x48], [r5+0x50], [r5+0x78], [r0+r2] 458cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 459cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x48], [r5+0x50], [r5+0x58], [r5+0x80], [r0] 460cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x50], [r5+0x58], [r5+0x60], [r5+0x80], [r0+r2] 461cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 462cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x58], [r5+0x60], [r5+0x68], [r5+0x78], [r0] 463cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x60], [r5+0x68], [r5+0x70], [r5+0x70], [r0+r2] 464cabdff1aSopenharmony_ci 465cabdff1aSopenharmony_ci add r5, 0x88 466cabdff1aSopenharmony_ci add r0, r1 467cabdff1aSopenharmony_ci dec r4d 468cabdff1aSopenharmony_ci jne .loopv 469cabdff1aSopenharmony_ci REP_RET 470cabdff1aSopenharmony_ci%endmacro 471cabdff1aSopenharmony_ci 472cabdff1aSopenharmony_ci%macro PUT_OPH 2-3 473cabdff1aSopenharmony_ci movh %1, %2 474cabdff1aSopenharmony_ci%endmacro 475cabdff1aSopenharmony_ci 476cabdff1aSopenharmony_ci%macro AVG_OPH 2-3 477cabdff1aSopenharmony_ci movh %3, %1 478cabdff1aSopenharmony_ci pavgb %2, %3 479cabdff1aSopenharmony_ci movh %1, %2 480cabdff1aSopenharmony_ci%endmacro 481cabdff1aSopenharmony_ci 482cabdff1aSopenharmony_ciINIT_MMX mmxext 483cabdff1aSopenharmony_ci%define PW_ROUND pw_16 484cabdff1aSopenharmony_ci%define OP_MOV PUT_OPH 485cabdff1aSopenharmony_ciMPEG4_QPEL16_V_LOWPASS put 486cabdff1aSopenharmony_ci%define PW_ROUND pw_16 487cabdff1aSopenharmony_ci%define OP_MOV AVG_OPH 488cabdff1aSopenharmony_ciMPEG4_QPEL16_V_LOWPASS avg 489cabdff1aSopenharmony_ci%define PW_ROUND pw_15 490cabdff1aSopenharmony_ci%define OP_MOV PUT_OPH 491cabdff1aSopenharmony_ciMPEG4_QPEL16_V_LOWPASS put_no_rnd 492cabdff1aSopenharmony_ci 493cabdff1aSopenharmony_ci 494cabdff1aSopenharmony_ci 495cabdff1aSopenharmony_ci%macro MPEG4_QPEL8_V_LOWPASS 1 496cabdff1aSopenharmony_cicglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288 497cabdff1aSopenharmony_ci movsxdifnidn r2, r2d 498cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 499cabdff1aSopenharmony_ci 500cabdff1aSopenharmony_ci mov r4d, 9 501cabdff1aSopenharmony_ci mov r5, rsp 502cabdff1aSopenharmony_ci pxor m7, m7 503cabdff1aSopenharmony_ci.looph: 504cabdff1aSopenharmony_ci mova m0, [r1] 505cabdff1aSopenharmony_ci mova m1, [r1] 506cabdff1aSopenharmony_ci punpcklbw m0, m7 507cabdff1aSopenharmony_ci punpckhbw m1, m7 508cabdff1aSopenharmony_ci mova [r5], m0 509cabdff1aSopenharmony_ci mova [r5+0x48], m1 510cabdff1aSopenharmony_ci add r5, 8 511cabdff1aSopenharmony_ci add r1, r3 512cabdff1aSopenharmony_ci dec r4d 513cabdff1aSopenharmony_ci jne .looph 514cabdff1aSopenharmony_ci 515cabdff1aSopenharmony_ci 516cabdff1aSopenharmony_ci ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 6*dstStride 517cabdff1aSopenharmony_ci mov r4d, 2 518cabdff1aSopenharmony_ci mov r1, 4 519cabdff1aSopenharmony_ci neg r2 520cabdff1aSopenharmony_ci lea r1, [r1+r2*4] 521cabdff1aSopenharmony_ci lea r1, [r1+r2*2] 522cabdff1aSopenharmony_ci neg r2 523cabdff1aSopenharmony_ci mov r5, rsp 524cabdff1aSopenharmony_ci.loopv: 525cabdff1aSopenharmony_ci pxor m7, m7 526cabdff1aSopenharmony_ci mova m0, [r5+ 0x0] 527cabdff1aSopenharmony_ci mova m1, [r5+ 0x8] 528cabdff1aSopenharmony_ci mova m2, [r5+0x10] 529cabdff1aSopenharmony_ci mova m3, [r5+0x18] 530cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0] 531cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2] 532cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 533cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0] 534cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2] 535cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 536cabdff1aSopenharmony_ci QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0] 537cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x40], [r0+r2] 538cabdff1aSopenharmony_ci lea r0, [r0+r2*2] 539cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x38], [r0] 540cabdff1aSopenharmony_ci QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x30], [r0+r2] 541cabdff1aSopenharmony_ci 542cabdff1aSopenharmony_ci add r5, 0x48 543cabdff1aSopenharmony_ci add r0, r1 544cabdff1aSopenharmony_ci dec r4d 545cabdff1aSopenharmony_ci jne .loopv 546cabdff1aSopenharmony_ci REP_RET 547cabdff1aSopenharmony_ci%endmacro 548cabdff1aSopenharmony_ci 549cabdff1aSopenharmony_ciINIT_MMX mmxext 550cabdff1aSopenharmony_ci%define PW_ROUND pw_16 551cabdff1aSopenharmony_ci%define OP_MOV PUT_OPH 552cabdff1aSopenharmony_ciMPEG4_QPEL8_V_LOWPASS put 553cabdff1aSopenharmony_ci%define PW_ROUND pw_16 554cabdff1aSopenharmony_ci%define OP_MOV AVG_OPH 555cabdff1aSopenharmony_ciMPEG4_QPEL8_V_LOWPASS avg 556cabdff1aSopenharmony_ci%define PW_ROUND pw_15 557cabdff1aSopenharmony_ci%define OP_MOV PUT_OPH 558cabdff1aSopenharmony_ciMPEG4_QPEL8_V_LOWPASS put_no_rnd 559