1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* SIMD-optimized UTVideo functions 3cabdff1aSopenharmony_ci;* Copyright (c) 2017 Paul B Mahol 4cabdff1aSopenharmony_ci;* Copyright (c) 2017 Jokyo Images 5cabdff1aSopenharmony_ci;* 6cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 7cabdff1aSopenharmony_ci;* 8cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci;* 13cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 17cabdff1aSopenharmony_ci;* 18cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci;****************************************************************************** 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ciSECTION_RODATA 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_cicextern pb_80 28cabdff1aSopenharmony_cicextern pw_512 29cabdff1aSopenharmony_cicextern pw_1023 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ciSECTION .text 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci;------------------------------------------------------------------------------------------- 34cabdff1aSopenharmony_ci; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, 35cabdff1aSopenharmony_ci; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, 36cabdff1aSopenharmony_ci; int width, int height) 37cabdff1aSopenharmony_ci;------------------------------------------------------------------------------------------- 38cabdff1aSopenharmony_ci%macro RESTORE_RGB_PLANES 0 39cabdff1aSopenharmony_cicglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x 40cabdff1aSopenharmony_ci movsxdifnidn wq, wd 41cabdff1aSopenharmony_ci add src_rq, wq 42cabdff1aSopenharmony_ci add src_gq, wq 43cabdff1aSopenharmony_ci add src_bq, wq 44cabdff1aSopenharmony_ci neg wq 45cabdff1aSopenharmony_ci%if ARCH_X86_64 == 0 46cabdff1aSopenharmony_ci mov wm, wq 47cabdff1aSopenharmony_ciDEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x 48cabdff1aSopenharmony_ci%define wq r6m 49cabdff1aSopenharmony_ci%define hd r7mp 50cabdff1aSopenharmony_ci%endif 51cabdff1aSopenharmony_ci mova m3, [pb_80] 52cabdff1aSopenharmony_ci.nextrow: 53cabdff1aSopenharmony_ci mov xq, wq 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci .loop: 56cabdff1aSopenharmony_ci mova m0, [src_rq + xq] 57cabdff1aSopenharmony_ci mova m1, [src_gq + xq] 58cabdff1aSopenharmony_ci mova m2, [src_bq + xq] 59cabdff1aSopenharmony_ci psubb m1, m3 60cabdff1aSopenharmony_ci paddb m0, m1 61cabdff1aSopenharmony_ci paddb m2, m1 62cabdff1aSopenharmony_ci mova [src_rq+xq], m0 63cabdff1aSopenharmony_ci mova [src_bq+xq], m2 64cabdff1aSopenharmony_ci add xq, mmsize 65cabdff1aSopenharmony_ci jl .loop 66cabdff1aSopenharmony_ci 67cabdff1aSopenharmony_ci add src_rq, linesize_rq 68cabdff1aSopenharmony_ci add src_gq, linesize_gq 69cabdff1aSopenharmony_ci add src_bq, linesize_bq 70cabdff1aSopenharmony_ci sub hd, 1 71cabdff1aSopenharmony_ci jg .nextrow 72cabdff1aSopenharmony_ci REP_RET 73cabdff1aSopenharmony_ci%endmacro 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ciINIT_XMM sse2 76cabdff1aSopenharmony_ciRESTORE_RGB_PLANES 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL 79cabdff1aSopenharmony_ciINIT_YMM avx2 80cabdff1aSopenharmony_ciRESTORE_RGB_PLANES 81cabdff1aSopenharmony_ci%endif 82cabdff1aSopenharmony_ci 83cabdff1aSopenharmony_ci;------------------------------------------------------------------------------------------- 84cabdff1aSopenharmony_ci; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, 85cabdff1aSopenharmony_ci; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, 86cabdff1aSopenharmony_ci; int width, int height) 87cabdff1aSopenharmony_ci;------------------------------------------------------------------------------------------- 88cabdff1aSopenharmony_ci%macro RESTORE_RGB_PLANES10 0 89cabdff1aSopenharmony_cicglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x 90cabdff1aSopenharmony_ci shl wd, 1 91cabdff1aSopenharmony_ci shl linesize_rq, 1 92cabdff1aSopenharmony_ci shl linesize_gq, 1 93cabdff1aSopenharmony_ci shl linesize_bq, 1 94cabdff1aSopenharmony_ci add src_rq, wq 95cabdff1aSopenharmony_ci add src_gq, wq 96cabdff1aSopenharmony_ci add src_bq, wq 97cabdff1aSopenharmony_ci mova m3, [pw_512] 98cabdff1aSopenharmony_ci mova m4, [pw_1023] 99cabdff1aSopenharmony_ci neg wq 100cabdff1aSopenharmony_ci%if ARCH_X86_64 == 0 101cabdff1aSopenharmony_ci mov wm, wq 102cabdff1aSopenharmony_ciDEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x 103cabdff1aSopenharmony_ci%define wq r6m 104cabdff1aSopenharmony_ci%define hd r7mp 105cabdff1aSopenharmony_ci%endif 106cabdff1aSopenharmony_ci.nextrow: 107cabdff1aSopenharmony_ci mov xq, wq 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ci .loop: 110cabdff1aSopenharmony_ci mova m0, [src_rq + xq] 111cabdff1aSopenharmony_ci mova m1, [src_gq + xq] 112cabdff1aSopenharmony_ci mova m2, [src_bq + xq] 113cabdff1aSopenharmony_ci psubw m1, m3 114cabdff1aSopenharmony_ci paddw m0, m1 115cabdff1aSopenharmony_ci paddw m2, m1 116cabdff1aSopenharmony_ci pand m0, m4 117cabdff1aSopenharmony_ci pand m2, m4 118cabdff1aSopenharmony_ci mova [src_rq+xq], m0 119cabdff1aSopenharmony_ci mova [src_bq+xq], m2 120cabdff1aSopenharmony_ci add xq, mmsize 121cabdff1aSopenharmony_ci jl .loop 122cabdff1aSopenharmony_ci 123cabdff1aSopenharmony_ci add src_rq, linesize_rq 124cabdff1aSopenharmony_ci add src_gq, linesize_gq 125cabdff1aSopenharmony_ci add src_bq, linesize_bq 126cabdff1aSopenharmony_ci sub hd, 1 127cabdff1aSopenharmony_ci jg .nextrow 128cabdff1aSopenharmony_ci REP_RET 129cabdff1aSopenharmony_ci%endmacro 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ciINIT_XMM sse2 132cabdff1aSopenharmony_ciRESTORE_RGB_PLANES10 133cabdff1aSopenharmony_ci 134cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL 135cabdff1aSopenharmony_ciINIT_YMM avx2 136cabdff1aSopenharmony_ciRESTORE_RGB_PLANES10 137cabdff1aSopenharmony_ci%endif 138