1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * SIMD-optimized pixel operations
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
22cabdff1aSopenharmony_ci#include "libavcodec/pixblockdsp.h"
23cabdff1aSopenharmony_ci#include "asm.h"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_cistatic void get_pixels_mvi(int16_t *restrict block,
26cabdff1aSopenharmony_ci                           const uint8_t *restrict pixels, ptrdiff_t stride)
27cabdff1aSopenharmony_ci{
28cabdff1aSopenharmony_ci    int h = 8;
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci    do {
31cabdff1aSopenharmony_ci        uint64_t p;
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci        p = ldq(pixels);
34cabdff1aSopenharmony_ci        stq(unpkbw(p),       block);
35cabdff1aSopenharmony_ci        stq(unpkbw(p >> 32), block + 4);
36cabdff1aSopenharmony_ci
37cabdff1aSopenharmony_ci        pixels += stride;
38cabdff1aSopenharmony_ci        block += 8;
39cabdff1aSopenharmony_ci    } while (--h);
40cabdff1aSopenharmony_ci}
41cabdff1aSopenharmony_ci
42cabdff1aSopenharmony_cistatic void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
43cabdff1aSopenharmony_ci                            ptrdiff_t stride)
44cabdff1aSopenharmony_ci{
45cabdff1aSopenharmony_ci    int h = 8;
46cabdff1aSopenharmony_ci    uint64_t mask = 0x4040;
47cabdff1aSopenharmony_ci
48cabdff1aSopenharmony_ci    mask |= mask << 16;
49cabdff1aSopenharmony_ci    mask |= mask << 32;
50cabdff1aSopenharmony_ci    do {
51cabdff1aSopenharmony_ci        uint64_t x, y, c, d, a;
52cabdff1aSopenharmony_ci        uint64_t signs;
53cabdff1aSopenharmony_ci
54cabdff1aSopenharmony_ci        x = ldq(s1);
55cabdff1aSopenharmony_ci        y = ldq(s2);
56cabdff1aSopenharmony_ci        c = cmpbge(x, y);
57cabdff1aSopenharmony_ci        d = x - y;
58cabdff1aSopenharmony_ci        a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
59cabdff1aSopenharmony_ci        d += 4 * a;             /* ...so we can use s4addq here.      */
60cabdff1aSopenharmony_ci        signs = zap(-1, c);
61cabdff1aSopenharmony_ci
62cabdff1aSopenharmony_ci        stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
63cabdff1aSopenharmony_ci        stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
64cabdff1aSopenharmony_ci
65cabdff1aSopenharmony_ci        s1 += stride;
66cabdff1aSopenharmony_ci        s2 += stride;
67cabdff1aSopenharmony_ci        block += 8;
68cabdff1aSopenharmony_ci    } while (--h);
69cabdff1aSopenharmony_ci}
70cabdff1aSopenharmony_ci
71cabdff1aSopenharmony_ciav_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
72cabdff1aSopenharmony_ci                                       unsigned high_bit_depth)
73cabdff1aSopenharmony_ci{
74cabdff1aSopenharmony_ci    if (amask(AMASK_MVI) == 0) {
75cabdff1aSopenharmony_ci        if (!high_bit_depth)
76cabdff1aSopenharmony_ci            c->get_pixels = get_pixels_mvi;
77cabdff1aSopenharmony_ci        c->diff_pixels = diff_pixels_mvi;
78cabdff1aSopenharmony_ci    }
79cabdff1aSopenharmony_ci}
80