1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD-optimized UTVideo functions
3cabdff1aSopenharmony_ci;* Copyright (c) 2017 Paul B Mahol
4cabdff1aSopenharmony_ci;* Copyright (c) 2017 Jokyo Images
5cabdff1aSopenharmony_ci;*
6cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
7cabdff1aSopenharmony_ci;*
8cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci;*
13cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
17cabdff1aSopenharmony_ci;*
18cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci;******************************************************************************
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ciSECTION_RODATA
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_cicextern pb_80
28cabdff1aSopenharmony_cicextern pw_512
29cabdff1aSopenharmony_cicextern pw_1023
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ciSECTION .text
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci;-------------------------------------------------------------------------------------------
34cabdff1aSopenharmony_ci; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
35cabdff1aSopenharmony_ci;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
36cabdff1aSopenharmony_ci;                         int width, int height)
37cabdff1aSopenharmony_ci;-------------------------------------------------------------------------------------------
38cabdff1aSopenharmony_ci%macro RESTORE_RGB_PLANES 0
39cabdff1aSopenharmony_cicglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
40cabdff1aSopenharmony_ci    movsxdifnidn wq, wd
41cabdff1aSopenharmony_ci    add      src_rq, wq
42cabdff1aSopenharmony_ci    add      src_gq, wq
43cabdff1aSopenharmony_ci    add      src_bq, wq
44cabdff1aSopenharmony_ci    neg          wq
45cabdff1aSopenharmony_ci%if ARCH_X86_64 == 0
46cabdff1aSopenharmony_ci    mov          wm, wq
47cabdff1aSopenharmony_ciDEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
48cabdff1aSopenharmony_ci%define wq r6m
49cabdff1aSopenharmony_ci%define hd r7mp
50cabdff1aSopenharmony_ci%endif
51cabdff1aSopenharmony_ci    mova         m3, [pb_80]
52cabdff1aSopenharmony_ci.nextrow:
53cabdff1aSopenharmony_ci    mov          xq, wq
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci    .loop:
56cabdff1aSopenharmony_ci        mova           m0, [src_rq + xq]
57cabdff1aSopenharmony_ci        mova           m1, [src_gq + xq]
58cabdff1aSopenharmony_ci        mova           m2, [src_bq + xq]
59cabdff1aSopenharmony_ci        psubb          m1, m3
60cabdff1aSopenharmony_ci        paddb          m0, m1
61cabdff1aSopenharmony_ci        paddb          m2, m1
62cabdff1aSopenharmony_ci        mova  [src_rq+xq], m0
63cabdff1aSopenharmony_ci        mova  [src_bq+xq], m2
64cabdff1aSopenharmony_ci        add            xq, mmsize
65cabdff1aSopenharmony_ci    jl .loop
66cabdff1aSopenharmony_ci
67cabdff1aSopenharmony_ci    add        src_rq, linesize_rq
68cabdff1aSopenharmony_ci    add        src_gq, linesize_gq
69cabdff1aSopenharmony_ci    add        src_bq, linesize_bq
70cabdff1aSopenharmony_ci    sub        hd, 1
71cabdff1aSopenharmony_ci    jg .nextrow
72cabdff1aSopenharmony_ci    REP_RET
73cabdff1aSopenharmony_ci%endmacro
74cabdff1aSopenharmony_ci
75cabdff1aSopenharmony_ciINIT_XMM sse2
76cabdff1aSopenharmony_ciRESTORE_RGB_PLANES
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL
79cabdff1aSopenharmony_ciINIT_YMM avx2
80cabdff1aSopenharmony_ciRESTORE_RGB_PLANES
81cabdff1aSopenharmony_ci%endif
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_ci;-------------------------------------------------------------------------------------------
84cabdff1aSopenharmony_ci; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
85cabdff1aSopenharmony_ci;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
86cabdff1aSopenharmony_ci;                         int width, int height)
87cabdff1aSopenharmony_ci;-------------------------------------------------------------------------------------------
88cabdff1aSopenharmony_ci%macro RESTORE_RGB_PLANES10 0
89cabdff1aSopenharmony_cicglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
90cabdff1aSopenharmony_ci    shl          wd, 1
91cabdff1aSopenharmony_ci    shl linesize_rq, 1
92cabdff1aSopenharmony_ci    shl linesize_gq, 1
93cabdff1aSopenharmony_ci    shl linesize_bq, 1
94cabdff1aSopenharmony_ci    add      src_rq, wq
95cabdff1aSopenharmony_ci    add      src_gq, wq
96cabdff1aSopenharmony_ci    add      src_bq, wq
97cabdff1aSopenharmony_ci    mova         m3, [pw_512]
98cabdff1aSopenharmony_ci    mova         m4, [pw_1023]
99cabdff1aSopenharmony_ci    neg          wq
100cabdff1aSopenharmony_ci%if ARCH_X86_64 == 0
101cabdff1aSopenharmony_ci    mov          wm, wq
102cabdff1aSopenharmony_ciDEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
103cabdff1aSopenharmony_ci%define wq r6m
104cabdff1aSopenharmony_ci%define hd r7mp
105cabdff1aSopenharmony_ci%endif
106cabdff1aSopenharmony_ci.nextrow:
107cabdff1aSopenharmony_ci    mov          xq, wq
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_ci    .loop:
110cabdff1aSopenharmony_ci        mova           m0, [src_rq + xq]
111cabdff1aSopenharmony_ci        mova           m1, [src_gq + xq]
112cabdff1aSopenharmony_ci        mova           m2, [src_bq + xq]
113cabdff1aSopenharmony_ci        psubw          m1, m3
114cabdff1aSopenharmony_ci        paddw          m0, m1
115cabdff1aSopenharmony_ci        paddw          m2, m1
116cabdff1aSopenharmony_ci        pand           m0, m4
117cabdff1aSopenharmony_ci        pand           m2, m4
118cabdff1aSopenharmony_ci        mova  [src_rq+xq], m0
119cabdff1aSopenharmony_ci        mova  [src_bq+xq], m2
120cabdff1aSopenharmony_ci        add            xq, mmsize
121cabdff1aSopenharmony_ci    jl .loop
122cabdff1aSopenharmony_ci
123cabdff1aSopenharmony_ci    add        src_rq, linesize_rq
124cabdff1aSopenharmony_ci    add        src_gq, linesize_gq
125cabdff1aSopenharmony_ci    add        src_bq, linesize_bq
126cabdff1aSopenharmony_ci    sub        hd, 1
127cabdff1aSopenharmony_ci    jg .nextrow
128cabdff1aSopenharmony_ci    REP_RET
129cabdff1aSopenharmony_ci%endmacro
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ciINIT_XMM sse2
132cabdff1aSopenharmony_ciRESTORE_RGB_PLANES10
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL
135cabdff1aSopenharmony_ciINIT_YMM avx2
136cabdff1aSopenharmony_ciRESTORE_RGB_PLANES10
137cabdff1aSopenharmony_ci%endif
138