1;****************************************************************************** 2;* SIMD-optimized clear block functions 3;* Copyright (c) 2002 Michael Niedermayer 4;* Copyright (c) 2008 Loren Merritt 5;* Copyright (c) 2009 Fiona Glaser 6;* 7;* AVX version by Jokyo Images 8;* 9;* This file is part of FFmpeg. 10;* 11;* FFmpeg is free software; you can redistribute it and/or 12;* modify it under the terms of the GNU Lesser General Public 13;* License as published by the Free Software Foundation; either 14;* version 2.1 of the License, or (at your option) any later version. 15;* 16;* FFmpeg is distributed in the hope that it will be useful, 17;* but WITHOUT ANY WARRANTY; without even the implied warranty of 18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19;* Lesser General Public License for more details. 20;* 21;* You should have received a copy of the GNU Lesser General Public 22;* License along with FFmpeg; if not, write to the Free Software 23;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24;****************************************************************************** 25 26%include "libavutil/x86/x86util.asm" 27 28SECTION .text 29 30;---------------------------------------- 31; void ff_clear_block(int16_t *blocks); 32;---------------------------------------- 33; %1 = number of xmm registers used 34; %2 = number of inline store loops 35%macro CLEAR_BLOCK 2 36cglobal clear_block, 1, 1, %1, blocks 37 ZERO m0, m0, m0 38%assign %%i 0 39%rep %2 40 mova [blocksq+mmsize*(0+%%i)], m0 41 mova [blocksq+mmsize*(1+%%i)], m0 42 mova [blocksq+mmsize*(2+%%i)], m0 43 mova [blocksq+mmsize*(3+%%i)], m0 44%assign %%i %%i+4 45%endrep 46 RET 47%endmacro 48 49INIT_XMM sse 50%define ZERO xorps 51CLEAR_BLOCK 1, 2 52INIT_YMM avx 53CLEAR_BLOCK 1, 1 54 55;----------------------------------------- 56; void ff_clear_blocks(int16_t *blocks); 57;----------------------------------------- 58; %1 = number of xmm registers used 59%macro CLEAR_BLOCKS 1 60cglobal clear_blocks, 1, 2, %1, blocks, len 61 add blocksq, 768 62 mov lenq, -768 63 ZERO m0, m0, m0 64.loop: 65 mova [blocksq+lenq+mmsize*0], m0 66 mova [blocksq+lenq+mmsize*1], m0 67 mova [blocksq+lenq+mmsize*2], m0 68 mova [blocksq+lenq+mmsize*3], m0 69 mova [blocksq+lenq+mmsize*4], m0 70 mova [blocksq+lenq+mmsize*5], m0 71 mova [blocksq+lenq+mmsize*6], m0 72 mova [blocksq+lenq+mmsize*7], m0 73 add lenq, mmsize*8 74 js .loop 75 RET 76%endmacro 77 78INIT_XMM sse 79%define ZERO xorps 80CLEAR_BLOCKS 1 81INIT_YMM avx 82CLEAR_BLOCKS 1 83