1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* SIMD-optimized quarterpel functions 3cabdff1aSopenharmony_ci;* Copyright (c) 2008 Loren Merritt 4cabdff1aSopenharmony_ci;* Copyright (c) 2003-2013 Michael Niedermayer 5cabdff1aSopenharmony_ci;* Copyright (c) 2013 Daniel Kang 6cabdff1aSopenharmony_ci;* 7cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 8cabdff1aSopenharmony_ci;* 9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci;* 14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 18cabdff1aSopenharmony_ci;* 19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci;****************************************************************************** 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ciSECTION .text 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci%macro op_avgh 3 29cabdff1aSopenharmony_ci movh %3, %2 30cabdff1aSopenharmony_ci pavgb %1, %3 31cabdff1aSopenharmony_ci movh %2, %1 32cabdff1aSopenharmony_ci%endmacro 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci%macro op_avg 2 35cabdff1aSopenharmony_ci pavgb %1, %2 36cabdff1aSopenharmony_ci mova %2, %1 37cabdff1aSopenharmony_ci%endmacro 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci%macro op_puth 2-3 40cabdff1aSopenharmony_ci movh %2, %1 41cabdff1aSopenharmony_ci%endmacro 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_ci%macro op_put 2 44cabdff1aSopenharmony_ci mova %2, %1 45cabdff1aSopenharmony_ci%endmacro 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, 48cabdff1aSopenharmony_ci; int dstStride, int src1Stride, int h) 49cabdff1aSopenharmony_ci%macro PIXELS4_L2 1 50cabdff1aSopenharmony_ci%define OP op_%1h 51cabdff1aSopenharmony_cicglobal %1_pixels4_l2, 6,6 52cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 53cabdff1aSopenharmony_ci movsxdifnidn r4, r4d 54cabdff1aSopenharmony_ci test r5d, 1 55cabdff1aSopenharmony_ci je .loop 56cabdff1aSopenharmony_ci movd m0, [r1] 57cabdff1aSopenharmony_ci movd m1, [r2] 58cabdff1aSopenharmony_ci add r1, r4 59cabdff1aSopenharmony_ci add r2, 4 60cabdff1aSopenharmony_ci pavgb m0, m1 61cabdff1aSopenharmony_ci OP m0, [r0], m3 62cabdff1aSopenharmony_ci add r0, r3 63cabdff1aSopenharmony_ci dec r5d 64cabdff1aSopenharmony_ci.loop: 65cabdff1aSopenharmony_ci mova m0, [r1] 66cabdff1aSopenharmony_ci mova m1, [r1+r4] 67cabdff1aSopenharmony_ci lea r1, [r1+2*r4] 68cabdff1aSopenharmony_ci pavgb m0, [r2] 69cabdff1aSopenharmony_ci pavgb m1, [r2+4] 70cabdff1aSopenharmony_ci OP m0, [r0], m3 71cabdff1aSopenharmony_ci OP m1, [r0+r3], m3 72cabdff1aSopenharmony_ci lea r0, [r0+2*r3] 73cabdff1aSopenharmony_ci mova m0, [r1] 74cabdff1aSopenharmony_ci mova m1, [r1+r4] 75cabdff1aSopenharmony_ci lea r1, [r1+2*r4] 76cabdff1aSopenharmony_ci pavgb m0, [r2+8] 77cabdff1aSopenharmony_ci pavgb m1, [r2+12] 78cabdff1aSopenharmony_ci OP m0, [r0], m3 79cabdff1aSopenharmony_ci OP m1, [r0+r3], m3 80cabdff1aSopenharmony_ci lea r0, [r0+2*r3] 81cabdff1aSopenharmony_ci add r2, 16 82cabdff1aSopenharmony_ci sub r5d, 4 83cabdff1aSopenharmony_ci jne .loop 84cabdff1aSopenharmony_ci REP_RET 85cabdff1aSopenharmony_ci%endmacro 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_ciINIT_MMX mmxext 88cabdff1aSopenharmony_ciPIXELS4_L2 put 89cabdff1aSopenharmony_ciPIXELS4_L2 avg 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ci; void ff_put/avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, 92cabdff1aSopenharmony_ci; int dstStride, int src1Stride, int h) 93cabdff1aSopenharmony_ci%macro PIXELS8_L2 1 94cabdff1aSopenharmony_ci%define OP op_%1 95cabdff1aSopenharmony_cicglobal %1_pixels8_l2, 6,6 96cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 97cabdff1aSopenharmony_ci movsxdifnidn r4, r4d 98cabdff1aSopenharmony_ci test r5d, 1 99cabdff1aSopenharmony_ci je .loop 100cabdff1aSopenharmony_ci mova m0, [r1] 101cabdff1aSopenharmony_ci mova m1, [r2] 102cabdff1aSopenharmony_ci add r1, r4 103cabdff1aSopenharmony_ci add r2, 8 104cabdff1aSopenharmony_ci pavgb m0, m1 105cabdff1aSopenharmony_ci OP m0, [r0] 106cabdff1aSopenharmony_ci add r0, r3 107cabdff1aSopenharmony_ci dec r5d 108cabdff1aSopenharmony_ci.loop: 109cabdff1aSopenharmony_ci mova m0, [r1] 110cabdff1aSopenharmony_ci mova m1, [r1+r4] 111cabdff1aSopenharmony_ci lea r1, [r1+2*r4] 112cabdff1aSopenharmony_ci pavgb m0, [r2] 113cabdff1aSopenharmony_ci pavgb m1, [r2+8] 114cabdff1aSopenharmony_ci OP m0, [r0] 115cabdff1aSopenharmony_ci OP m1, [r0+r3] 116cabdff1aSopenharmony_ci lea r0, [r0+2*r3] 117cabdff1aSopenharmony_ci mova m0, [r1] 118cabdff1aSopenharmony_ci mova m1, [r1+r4] 119cabdff1aSopenharmony_ci lea r1, [r1+2*r4] 120cabdff1aSopenharmony_ci pavgb m0, [r2+16] 121cabdff1aSopenharmony_ci pavgb m1, [r2+24] 122cabdff1aSopenharmony_ci OP m0, [r0] 123cabdff1aSopenharmony_ci OP m1, [r0+r3] 124cabdff1aSopenharmony_ci lea r0, [r0+2*r3] 125cabdff1aSopenharmony_ci add r2, 32 126cabdff1aSopenharmony_ci sub r5d, 4 127cabdff1aSopenharmony_ci jne .loop 128cabdff1aSopenharmony_ci REP_RET 129cabdff1aSopenharmony_ci%endmacro 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ciINIT_MMX mmxext 132cabdff1aSopenharmony_ciPIXELS8_L2 put 133cabdff1aSopenharmony_ciPIXELS8_L2 avg 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci; void ff_put/avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, 136cabdff1aSopenharmony_ci; int dstStride, int src1Stride, int h) 137cabdff1aSopenharmony_ci%macro PIXELS16_L2 1 138cabdff1aSopenharmony_ci%define OP op_%1 139cabdff1aSopenharmony_cicglobal %1_pixels16_l2, 6,6 140cabdff1aSopenharmony_ci movsxdifnidn r3, r3d 141cabdff1aSopenharmony_ci movsxdifnidn r4, r4d 142cabdff1aSopenharmony_ci test r5d, 1 143cabdff1aSopenharmony_ci je .loop 144cabdff1aSopenharmony_ci mova m0, [r1] 145cabdff1aSopenharmony_ci mova m1, [r1+8] 146cabdff1aSopenharmony_ci pavgb m0, [r2] 147cabdff1aSopenharmony_ci pavgb m1, [r2+8] 148cabdff1aSopenharmony_ci add r1, r4 149cabdff1aSopenharmony_ci add r2, 16 150cabdff1aSopenharmony_ci OP m0, [r0] 151cabdff1aSopenharmony_ci OP m1, [r0+8] 152cabdff1aSopenharmony_ci add r0, r3 153cabdff1aSopenharmony_ci dec r5d 154cabdff1aSopenharmony_ci.loop: 155cabdff1aSopenharmony_ci mova m0, [r1] 156cabdff1aSopenharmony_ci mova m1, [r1+8] 157cabdff1aSopenharmony_ci add r1, r4 158cabdff1aSopenharmony_ci pavgb m0, [r2] 159cabdff1aSopenharmony_ci pavgb m1, [r2+8] 160cabdff1aSopenharmony_ci OP m0, [r0] 161cabdff1aSopenharmony_ci OP m1, [r0+8] 162cabdff1aSopenharmony_ci add r0, r3 163cabdff1aSopenharmony_ci mova m0, [r1] 164cabdff1aSopenharmony_ci mova m1, [r1+8] 165cabdff1aSopenharmony_ci add r1, r4 166cabdff1aSopenharmony_ci pavgb m0, [r2+16] 167cabdff1aSopenharmony_ci pavgb m1, [r2+24] 168cabdff1aSopenharmony_ci OP m0, [r0] 169cabdff1aSopenharmony_ci OP m1, [r0+8] 170cabdff1aSopenharmony_ci add r0, r3 171cabdff1aSopenharmony_ci add r2, 32 172cabdff1aSopenharmony_ci sub r5d, 2 173cabdff1aSopenharmony_ci jne .loop 174cabdff1aSopenharmony_ci REP_RET 175cabdff1aSopenharmony_ci%endmacro 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_ciINIT_MMX mmxext 178cabdff1aSopenharmony_ciPIXELS16_L2 put 179cabdff1aSopenharmony_ciPIXELS16_L2 avg 180