1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* SIMD-optimized fullpel functions 3cabdff1aSopenharmony_ci;* Copyright (c) 2008 Loren Merritt 4cabdff1aSopenharmony_ci;* Copyright (c) 2003-2013 Michael Niedermayer 5cabdff1aSopenharmony_ci;* Copyright (c) 2013 Daniel Kang 6cabdff1aSopenharmony_ci;* 7cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 8cabdff1aSopenharmony_ci;* 9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci;* 14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 18cabdff1aSopenharmony_ci;* 19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci;****************************************************************************** 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ciSECTION .text 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci%macro PAVGB_MMX 4 29cabdff1aSopenharmony_ci LOAD %3, %1 30cabdff1aSopenharmony_ci por %3, %2 31cabdff1aSopenharmony_ci pxor %2, %1 32cabdff1aSopenharmony_ci pand %2, %4 33cabdff1aSopenharmony_ci psrlq %2, 1 34cabdff1aSopenharmony_ci psubb %3, %2 35cabdff1aSopenharmony_ci SWAP %2, %3 36cabdff1aSopenharmony_ci%endmacro 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels, 39cabdff1aSopenharmony_ci; ptrdiff_t line_size, int h) 40cabdff1aSopenharmony_ci%macro OP_PIXELS 2 41cabdff1aSopenharmony_ci%if %2 == mmsize/2 42cabdff1aSopenharmony_ci%define LOAD movh 43cabdff1aSopenharmony_ci%define SAVE movh 44cabdff1aSopenharmony_ci%define LEN mmsize 45cabdff1aSopenharmony_ci%else 46cabdff1aSopenharmony_ci%define LOAD movu 47cabdff1aSopenharmony_ci%define SAVE mova 48cabdff1aSopenharmony_ci%define LEN %2 49cabdff1aSopenharmony_ci%endif 50cabdff1aSopenharmony_cicglobal %1_pixels%2, 4,5,4 51cabdff1aSopenharmony_ci lea r4, [r2*3] 52cabdff1aSopenharmony_ci%ifidn %1, avg 53cabdff1aSopenharmony_ci%if notcpuflag(mmxext) 54cabdff1aSopenharmony_ci pcmpeqd m6, m6 55cabdff1aSopenharmony_ci paddb m6, m6 56cabdff1aSopenharmony_ci%endif 57cabdff1aSopenharmony_ci%endif 58cabdff1aSopenharmony_ci.loop: 59cabdff1aSopenharmony_ci%assign %%i 0 60cabdff1aSopenharmony_ci%rep LEN/mmsize 61cabdff1aSopenharmony_ci LOAD m0, [r1 + %%i] 62cabdff1aSopenharmony_ci LOAD m1, [r1+r2 + %%i] 63cabdff1aSopenharmony_ci LOAD m2, [r1+r2*2 + %%i] 64cabdff1aSopenharmony_ci LOAD m3, [r1+r4 + %%i] 65cabdff1aSopenharmony_ci%ifidn %1, avg 66cabdff1aSopenharmony_ci%if notcpuflag(mmxext) 67cabdff1aSopenharmony_ci PAVGB_MMX [r0 + %%i], m0, m4, m6 68cabdff1aSopenharmony_ci PAVGB_MMX [r0+r2 + %%i], m1, m5, m6 69cabdff1aSopenharmony_ci PAVGB_MMX [r0+r2*2 + %%i], m2, m4, m6 70cabdff1aSopenharmony_ci PAVGB_MMX [r0+r4 + %%i], m3, m5, m6 71cabdff1aSopenharmony_ci%else 72cabdff1aSopenharmony_ci pavgb m0, [r0 + %%i] 73cabdff1aSopenharmony_ci pavgb m1, [r0+r2 + %%i] 74cabdff1aSopenharmony_ci pavgb m2, [r0+r2*2 + %%i] 75cabdff1aSopenharmony_ci pavgb m3, [r0+r4 + %%i] 76cabdff1aSopenharmony_ci%endif 77cabdff1aSopenharmony_ci%endif 78cabdff1aSopenharmony_ci SAVE [r0 + %%i], m0 79cabdff1aSopenharmony_ci SAVE [r0+r2 + %%i], m1 80cabdff1aSopenharmony_ci SAVE [r0+r2*2 + %%i], m2 81cabdff1aSopenharmony_ci SAVE [r0+r4 + %%i], m3 82cabdff1aSopenharmony_ci%assign %%i %%i+mmsize 83cabdff1aSopenharmony_ci%endrep 84cabdff1aSopenharmony_ci sub r3d, 4 85cabdff1aSopenharmony_ci lea r1, [r1+r2*4] 86cabdff1aSopenharmony_ci lea r0, [r0+r2*4] 87cabdff1aSopenharmony_ci jne .loop 88cabdff1aSopenharmony_ci RET 89cabdff1aSopenharmony_ci%endmacro 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ciINIT_MMX mmx 92cabdff1aSopenharmony_ciOP_PIXELS put, 4 93cabdff1aSopenharmony_ciOP_PIXELS put, 8 94cabdff1aSopenharmony_ciOP_PIXELS put, 16 95cabdff1aSopenharmony_ciOP_PIXELS avg, 16 96cabdff1aSopenharmony_ci 97cabdff1aSopenharmony_ciINIT_MMX mmxext 98cabdff1aSopenharmony_ciOP_PIXELS avg, 4 99cabdff1aSopenharmony_ciOP_PIXELS avg, 8 100cabdff1aSopenharmony_ciOP_PIXELS avg, 16 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ciINIT_XMM sse2 103cabdff1aSopenharmony_ciOP_PIXELS put, 16 104cabdff1aSopenharmony_ciOP_PIXELS avg, 16 105