1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* X86 Optimized functions for Open Exr Decoder 3cabdff1aSopenharmony_ci;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC 4cabdff1aSopenharmony_ci;* 5cabdff1aSopenharmony_ci;* reorder_pixels, predictor based on patch by John Loy 6cabdff1aSopenharmony_ci;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema 7cabdff1aSopenharmony_ci;* 8cabdff1aSopenharmony_ci;* predictor AVX/AVX2 by Henrik Gramner 9cabdff1aSopenharmony_ci;* 10cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 11cabdff1aSopenharmony_ci;* 12cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 13cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 14cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 15cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 16cabdff1aSopenharmony_ci;* 17cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 18cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 19cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 21cabdff1aSopenharmony_ci;* 22cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 23cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 24cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25cabdff1aSopenharmony_ci;****************************************************************************** 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_cicextern pb_15 30cabdff1aSopenharmony_cicextern pb_80 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ciSECTION .text 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 35cabdff1aSopenharmony_ci; void ff_reorder_pixels(uint8_t *dst, const uint8_t *src, ptrdiff_t size); 36cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_ci%macro REORDER_PIXELS 0 39cabdff1aSopenharmony_cicglobal reorder_pixels, 3,4,3, dst, src1, size, src2 40cabdff1aSopenharmony_ci lea src2q, [src1q+sizeq] ; src2 = src + 2 * half_size 41cabdff1aSopenharmony_ci add dstq, sizeq ; dst offset by size 42cabdff1aSopenharmony_ci shr sizeq, 1 ; half_size 43cabdff1aSopenharmony_ci add src1q, sizeq ; offset src by half_size 44cabdff1aSopenharmony_ci neg sizeq ; size = offset for dst, src1, src2 45cabdff1aSopenharmony_ci.loop: 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci mova m0, [src1q+sizeq] ; load first part 48cabdff1aSopenharmony_ci movu m1, [src2q+sizeq] ; load second part 49cabdff1aSopenharmony_ci SBUTTERFLY bw, 0, 1, 2 ; interleaved 50cabdff1aSopenharmony_ci mova [dstq+2*sizeq ], xm0 ; copy to dst 51cabdff1aSopenharmony_ci mova [dstq+2*sizeq+16], xm1 52cabdff1aSopenharmony_ci%if cpuflag(avx2) 53cabdff1aSopenharmony_ci vperm2i128 m0, m0, m1, q0301 54cabdff1aSopenharmony_ci mova [dstq+2*sizeq+32], m0 55cabdff1aSopenharmony_ci%endif 56cabdff1aSopenharmony_ci add sizeq, mmsize 57cabdff1aSopenharmony_ci jl .loop 58cabdff1aSopenharmony_ci RET 59cabdff1aSopenharmony_ci%endmacro 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ciINIT_XMM sse2 62cabdff1aSopenharmony_ciREORDER_PIXELS 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL 65cabdff1aSopenharmony_ciINIT_YMM avx2 66cabdff1aSopenharmony_ciREORDER_PIXELS 67cabdff1aSopenharmony_ci%endif 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 71cabdff1aSopenharmony_ci; void ff_predictor(uint8_t *src, ptrdiff_t size); 72cabdff1aSopenharmony_ci;------------------------------------------------------------------------------ 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ci%macro PREDICTOR 0 75cabdff1aSopenharmony_cicglobal predictor, 2,2,5, src, size 76cabdff1aSopenharmony_ci mova m0, [pb_80] 77cabdff1aSopenharmony_ci mova xm1, [pb_15] 78cabdff1aSopenharmony_ci mova xm2, xm0 79cabdff1aSopenharmony_ci add srcq, sizeq 80cabdff1aSopenharmony_ci neg sizeq 81cabdff1aSopenharmony_ci.loop: 82cabdff1aSopenharmony_ci pxor m3, m0, [srcq + sizeq] 83cabdff1aSopenharmony_ci pslldq m4, m3, 1 84cabdff1aSopenharmony_ci paddb m3, m4 85cabdff1aSopenharmony_ci pslldq m4, m3, 2 86cabdff1aSopenharmony_ci paddb m3, m4 87cabdff1aSopenharmony_ci pslldq m4, m3, 4 88cabdff1aSopenharmony_ci paddb m3, m4 89cabdff1aSopenharmony_ci pslldq m4, m3, 8 90cabdff1aSopenharmony_ci%if mmsize == 32 91cabdff1aSopenharmony_ci paddb m3, m4 92cabdff1aSopenharmony_ci paddb xm2, xm3 93cabdff1aSopenharmony_ci vextracti128 xm4, m3, 1 94cabdff1aSopenharmony_ci mova [srcq + sizeq], xm2 95cabdff1aSopenharmony_ci pshufb xm2, xm1 96cabdff1aSopenharmony_ci paddb xm2, xm4 97cabdff1aSopenharmony_ci mova [srcq + sizeq + 16], xm2 98cabdff1aSopenharmony_ci%else 99cabdff1aSopenharmony_ci paddb m2, m3 100cabdff1aSopenharmony_ci paddb m2, m4 101cabdff1aSopenharmony_ci mova [srcq + sizeq], m2 102cabdff1aSopenharmony_ci%endif 103cabdff1aSopenharmony_ci pshufb xm2, xm1 104cabdff1aSopenharmony_ci add sizeq, mmsize 105cabdff1aSopenharmony_ci jl .loop 106cabdff1aSopenharmony_ci RET 107cabdff1aSopenharmony_ci%endmacro 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ciINIT_XMM ssse3 110cabdff1aSopenharmony_ciPREDICTOR 111cabdff1aSopenharmony_ci 112cabdff1aSopenharmony_ciINIT_XMM avx 113cabdff1aSopenharmony_ciPREDICTOR 114cabdff1aSopenharmony_ci 115cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL 116cabdff1aSopenharmony_ciINIT_YMM avx2 117cabdff1aSopenharmony_ciPREDICTOR 118cabdff1aSopenharmony_ci%endif 119