1cabdff1aSopenharmony_ci;***************************************************************************** 2cabdff1aSopenharmony_ci;* SIMD-optimized pixel operations 3cabdff1aSopenharmony_ci;***************************************************************************** 4cabdff1aSopenharmony_ci;* Copyright (c) 2000, 2001 Fabrice Bellard 5cabdff1aSopenharmony_ci;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 6cabdff1aSopenharmony_ci;* 7cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 8cabdff1aSopenharmony_ci;* 9cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci;* 14cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 18cabdff1aSopenharmony_ci;* 19cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci;***************************************************************************** 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ciSECTION .text 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ciINIT_XMM sse2 29cabdff1aSopenharmony_cicglobal get_pixels, 3, 4, 5 30cabdff1aSopenharmony_ci lea r3, [r2*3] 31cabdff1aSopenharmony_ci pxor m4, m4 32cabdff1aSopenharmony_ci movh m0, [r1] 33cabdff1aSopenharmony_ci movh m1, [r1+r2] 34cabdff1aSopenharmony_ci movh m2, [r1+r2*2] 35cabdff1aSopenharmony_ci movh m3, [r1+r3] 36cabdff1aSopenharmony_ci lea r1, [r1+r2*4] 37cabdff1aSopenharmony_ci punpcklbw m0, m4 38cabdff1aSopenharmony_ci punpcklbw m1, m4 39cabdff1aSopenharmony_ci punpcklbw m2, m4 40cabdff1aSopenharmony_ci punpcklbw m3, m4 41cabdff1aSopenharmony_ci mova [r0], m0 42cabdff1aSopenharmony_ci mova [r0+0x10], m1 43cabdff1aSopenharmony_ci mova [r0+0x20], m2 44cabdff1aSopenharmony_ci mova [r0+0x30], m3 45cabdff1aSopenharmony_ci movh m0, [r1] 46cabdff1aSopenharmony_ci movh m1, [r1+r2*1] 47cabdff1aSopenharmony_ci movh m2, [r1+r2*2] 48cabdff1aSopenharmony_ci movh m3, [r1+r3] 49cabdff1aSopenharmony_ci punpcklbw m0, m4 50cabdff1aSopenharmony_ci punpcklbw m1, m4 51cabdff1aSopenharmony_ci punpcklbw m2, m4 52cabdff1aSopenharmony_ci punpcklbw m3, m4 53cabdff1aSopenharmony_ci mova [r0+0x40], m0 54cabdff1aSopenharmony_ci mova [r0+0x50], m1 55cabdff1aSopenharmony_ci mova [r0+0x60], m2 56cabdff1aSopenharmony_ci mova [r0+0x70], m3 57cabdff1aSopenharmony_ci RET 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_ci; void ff_diff_pixels(int16_t *block, const uint8_t *s1, const uint8_t *s2, 60cabdff1aSopenharmony_ci; ptrdiff_t stride); 61cabdff1aSopenharmony_ciINIT_XMM sse2 62cabdff1aSopenharmony_cicglobal diff_pixels, 4,5,5 63cabdff1aSopenharmony_ci pxor m4, m4 64cabdff1aSopenharmony_ci add r0, 128 65cabdff1aSopenharmony_ci mov r4, -128 66cabdff1aSopenharmony_ci.loop: 67cabdff1aSopenharmony_ci movq m0, [r1] 68cabdff1aSopenharmony_ci movq m2, [r2] 69cabdff1aSopenharmony_ci movq m1, [r1+r3] 70cabdff1aSopenharmony_ci movq m3, [r2+r3] 71cabdff1aSopenharmony_ci punpcklbw m0, m4 72cabdff1aSopenharmony_ci punpcklbw m1, m4 73cabdff1aSopenharmony_ci punpcklbw m2, m4 74cabdff1aSopenharmony_ci punpcklbw m3, m4 75cabdff1aSopenharmony_ci psubw m0, m2 76cabdff1aSopenharmony_ci psubw m1, m3 77cabdff1aSopenharmony_ci mova [r0+r4+0], m0 78cabdff1aSopenharmony_ci mova [r0+r4+mmsize], m1 79cabdff1aSopenharmony_ci lea r1, [r1+r3*2] 80cabdff1aSopenharmony_ci lea r2, [r2+r3*2] 81cabdff1aSopenharmony_ci add r4, 2 * mmsize 82cabdff1aSopenharmony_ci jne .loop 83cabdff1aSopenharmony_ci RET 84