1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * SIMD-optimized pixel operations 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 22cabdff1aSopenharmony_ci#include "libavcodec/pixblockdsp.h" 23cabdff1aSopenharmony_ci#include "asm.h" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_cistatic void get_pixels_mvi(int16_t *restrict block, 26cabdff1aSopenharmony_ci const uint8_t *restrict pixels, ptrdiff_t stride) 27cabdff1aSopenharmony_ci{ 28cabdff1aSopenharmony_ci int h = 8; 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci do { 31cabdff1aSopenharmony_ci uint64_t p; 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci p = ldq(pixels); 34cabdff1aSopenharmony_ci stq(unpkbw(p), block); 35cabdff1aSopenharmony_ci stq(unpkbw(p >> 32), block + 4); 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ci pixels += stride; 38cabdff1aSopenharmony_ci block += 8; 39cabdff1aSopenharmony_ci } while (--h); 40cabdff1aSopenharmony_ci} 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_cistatic void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, 43cabdff1aSopenharmony_ci ptrdiff_t stride) 44cabdff1aSopenharmony_ci{ 45cabdff1aSopenharmony_ci int h = 8; 46cabdff1aSopenharmony_ci uint64_t mask = 0x4040; 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ci mask |= mask << 16; 49cabdff1aSopenharmony_ci mask |= mask << 32; 50cabdff1aSopenharmony_ci do { 51cabdff1aSopenharmony_ci uint64_t x, y, c, d, a; 52cabdff1aSopenharmony_ci uint64_t signs; 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ci x = ldq(s1); 55cabdff1aSopenharmony_ci y = ldq(s2); 56cabdff1aSopenharmony_ci c = cmpbge(x, y); 57cabdff1aSopenharmony_ci d = x - y; 58cabdff1aSopenharmony_ci a = zap(mask, c); /* We use 0x4040404040404040 here... */ 59cabdff1aSopenharmony_ci d += 4 * a; /* ...so we can use s4addq here. */ 60cabdff1aSopenharmony_ci signs = zap(-1, c); 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_ci stq(unpkbw(d) | (unpkbw(signs) << 8), block); 63cabdff1aSopenharmony_ci stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_ci s1 += stride; 66cabdff1aSopenharmony_ci s2 += stride; 67cabdff1aSopenharmony_ci block += 8; 68cabdff1aSopenharmony_ci } while (--h); 69cabdff1aSopenharmony_ci} 70cabdff1aSopenharmony_ci 71cabdff1aSopenharmony_ciav_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx, 72cabdff1aSopenharmony_ci unsigned high_bit_depth) 73cabdff1aSopenharmony_ci{ 74cabdff1aSopenharmony_ci if (amask(AMASK_MVI) == 0) { 75cabdff1aSopenharmony_ci if (!high_bit_depth) 76cabdff1aSopenharmony_ci c->get_pixels = get_pixels_mvi; 77cabdff1aSopenharmony_ci c->diff_pixels = diff_pixels_mvi; 78cabdff1aSopenharmony_ci } 79cabdff1aSopenharmony_ci} 80