1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD-optimized clear block functions
3cabdff1aSopenharmony_ci;* Copyright (c) 2002 Michael Niedermayer
4cabdff1aSopenharmony_ci;* Copyright (c) 2008 Loren Merritt
5cabdff1aSopenharmony_ci;* Copyright (c) 2009 Fiona Glaser
6cabdff1aSopenharmony_ci;*
7cabdff1aSopenharmony_ci;* AVX version by Jokyo Images
8cabdff1aSopenharmony_ci;*
9cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
10cabdff1aSopenharmony_ci;*
11cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
12cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
13cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
14cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
15cabdff1aSopenharmony_ci;*
16cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
17cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
20cabdff1aSopenharmony_ci;*
21cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
22cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
23cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24cabdff1aSopenharmony_ci;******************************************************************************
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ciSECTION .text
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci;----------------------------------------
31cabdff1aSopenharmony_ci; void ff_clear_block(int16_t *blocks);
32cabdff1aSopenharmony_ci;----------------------------------------
33cabdff1aSopenharmony_ci; %1 = number of xmm registers used
34cabdff1aSopenharmony_ci; %2 = number of inline store loops
35cabdff1aSopenharmony_ci%macro CLEAR_BLOCK 2
36cabdff1aSopenharmony_cicglobal clear_block, 1, 1, %1, blocks
37cabdff1aSopenharmony_ci    ZERO  m0, m0, m0
38cabdff1aSopenharmony_ci%assign %%i 0
39cabdff1aSopenharmony_ci%rep %2
40cabdff1aSopenharmony_ci    mova  [blocksq+mmsize*(0+%%i)], m0
41cabdff1aSopenharmony_ci    mova  [blocksq+mmsize*(1+%%i)], m0
42cabdff1aSopenharmony_ci    mova  [blocksq+mmsize*(2+%%i)], m0
43cabdff1aSopenharmony_ci    mova  [blocksq+mmsize*(3+%%i)], m0
44cabdff1aSopenharmony_ci%assign %%i %%i+4
45cabdff1aSopenharmony_ci%endrep
46cabdff1aSopenharmony_ci    RET
47cabdff1aSopenharmony_ci%endmacro
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_ciINIT_XMM sse
50cabdff1aSopenharmony_ci%define ZERO xorps
51cabdff1aSopenharmony_ciCLEAR_BLOCK 1, 2
52cabdff1aSopenharmony_ciINIT_YMM avx
53cabdff1aSopenharmony_ciCLEAR_BLOCK 1, 1
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci;-----------------------------------------
56cabdff1aSopenharmony_ci; void ff_clear_blocks(int16_t *blocks);
57cabdff1aSopenharmony_ci;-----------------------------------------
58cabdff1aSopenharmony_ci; %1 = number of xmm registers used
59cabdff1aSopenharmony_ci%macro CLEAR_BLOCKS 1
60cabdff1aSopenharmony_cicglobal clear_blocks, 1, 2, %1, blocks, len
61cabdff1aSopenharmony_ci    add   blocksq, 768
62cabdff1aSopenharmony_ci    mov      lenq, -768
63cabdff1aSopenharmony_ci    ZERO       m0, m0, m0
64cabdff1aSopenharmony_ci.loop:
65cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*0], m0
66cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*1], m0
67cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*2], m0
68cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*3], m0
69cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*4], m0
70cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*5], m0
71cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*6], m0
72cabdff1aSopenharmony_ci    mova  [blocksq+lenq+mmsize*7], m0
73cabdff1aSopenharmony_ci    add   lenq, mmsize*8
74cabdff1aSopenharmony_ci    js .loop
75cabdff1aSopenharmony_ci    RET
76cabdff1aSopenharmony_ci%endmacro
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ciINIT_XMM sse
79cabdff1aSopenharmony_ci%define ZERO xorps
80cabdff1aSopenharmony_ciCLEAR_BLOCKS 1
81cabdff1aSopenharmony_ciINIT_YMM avx
82cabdff1aSopenharmony_ciCLEAR_BLOCKS 1
83