1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2020 Martin Storsjo
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci// void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
24cabdff1aSopenharmony_ci//                               uint8_t *dest, int width, int height,
25cabdff1aSopenharmony_ci//                               int src1Stride, int src2Stride, int dstStride);
26cabdff1aSopenharmony_cifunction ff_interleave_bytes_neon, export=1
27cabdff1aSopenharmony_ci        sub             w5,  w5,  w3
28cabdff1aSopenharmony_ci        sub             w6,  w6,  w3
29cabdff1aSopenharmony_ci        sub             w7,  w7,  w3, lsl #1
30cabdff1aSopenharmony_ci1:
31cabdff1aSopenharmony_ci        ands            w8,  w3,  #0xfffffff0 // & ~15
32cabdff1aSopenharmony_ci        b.eq            3f
33cabdff1aSopenharmony_ci2:
34cabdff1aSopenharmony_ci        ld1             {v0.16b}, [x0], #16
35cabdff1aSopenharmony_ci        ld1             {v1.16b}, [x1], #16
36cabdff1aSopenharmony_ci        subs            w8,  w8,  #16
37cabdff1aSopenharmony_ci        st2             {v0.16b, v1.16b}, [x2], #32
38cabdff1aSopenharmony_ci        b.gt            2b
39cabdff1aSopenharmony_ci
40cabdff1aSopenharmony_ci        tst             w3,  #15
41cabdff1aSopenharmony_ci        b.eq            9f
42cabdff1aSopenharmony_ci
43cabdff1aSopenharmony_ci3:
44cabdff1aSopenharmony_ci        tst             w3,  #8
45cabdff1aSopenharmony_ci        b.eq            4f
46cabdff1aSopenharmony_ci        ld1             {v0.8b}, [x0], #8
47cabdff1aSopenharmony_ci        ld1             {v1.8b}, [x1], #8
48cabdff1aSopenharmony_ci        st2             {v0.8b, v1.8b}, [x2], #16
49cabdff1aSopenharmony_ci4:
50cabdff1aSopenharmony_ci        tst             w3,  #4
51cabdff1aSopenharmony_ci        b.eq            5f
52cabdff1aSopenharmony_ci
53cabdff1aSopenharmony_ci        ld1             {v0.s}[0], [x0], #4
54cabdff1aSopenharmony_ci        ld1             {v1.s}[0], [x1], #4
55cabdff1aSopenharmony_ci        zip1            v0.8b,   v0.8b,   v1.8b
56cabdff1aSopenharmony_ci        st1             {v0.8b}, [x2], #8
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci5:
59cabdff1aSopenharmony_ci        ands            w8,  w3,  #3
60cabdff1aSopenharmony_ci        b.eq            9f
61cabdff1aSopenharmony_ci6:
62cabdff1aSopenharmony_ci        ldrb            w9,  [x0], #1
63cabdff1aSopenharmony_ci        ldrb            w10, [x1], #1
64cabdff1aSopenharmony_ci        subs            w8,  w8,  #1
65cabdff1aSopenharmony_ci        bfi             w9,  w10, #8,  #8
66cabdff1aSopenharmony_ci        strh            w9,  [x2], #2
67cabdff1aSopenharmony_ci        b.gt            6b
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_ci9:
70cabdff1aSopenharmony_ci        subs            w4,  w4,  #1
71cabdff1aSopenharmony_ci        b.eq            0f
72cabdff1aSopenharmony_ci        add             x0,  x0,  w5, sxtw
73cabdff1aSopenharmony_ci        add             x1,  x1,  w6, sxtw
74cabdff1aSopenharmony_ci        add             x2,  x2,  w7, sxtw
75cabdff1aSopenharmony_ci        b               1b
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci0:
78cabdff1aSopenharmony_ci        ret
79cabdff1aSopenharmony_ciendfunc
80