1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2017 Google Inc.
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci@ All public functions in this file have the following signature:
24cabdff1aSopenharmony_ci@ typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
25cabdff1aSopenharmony_ci@                            const uint8_t *ref, ptrdiff_t ref_stride,
26cabdff1aSopenharmony_ci@                            int h, int mx, int my);
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_cifunction ff_vp9_copy128_neon, export=1
29cabdff1aSopenharmony_ci        ldr             r12, [sp]
30cabdff1aSopenharmony_ci        sub             r1,  r1,  #96
31cabdff1aSopenharmony_ci        sub             r3,  r3,  #96
32cabdff1aSopenharmony_ci1:
33cabdff1aSopenharmony_ci        subs            r12, r12, #1
34cabdff1aSopenharmony_ci        vld1.16         {q0,  q1},  [r2]!
35cabdff1aSopenharmony_ci        vst1.16         {q0,  q1},  [r0, :128]!
36cabdff1aSopenharmony_ci        vld1.16         {q2,  q3},  [r2]!
37cabdff1aSopenharmony_ci        vst1.16         {q2,  q3},  [r0, :128]!
38cabdff1aSopenharmony_ci        vld1.16         {q8,  q9},  [r2]!
39cabdff1aSopenharmony_ci        vst1.16         {q8,  q9},  [r0, :128]!
40cabdff1aSopenharmony_ci        vld1.16         {q10, q11}, [r2], r3
41cabdff1aSopenharmony_ci        vst1.16         {q10, q11}, [r0, :128], r1
42cabdff1aSopenharmony_ci        bne             1b
43cabdff1aSopenharmony_ci        bx              lr
44cabdff1aSopenharmony_ciendfunc
45cabdff1aSopenharmony_ci
46cabdff1aSopenharmony_cifunction ff_vp9_avg64_16_neon, export=1
47cabdff1aSopenharmony_ci        push            {lr}
48cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]
49cabdff1aSopenharmony_ci        sub             r1,  r1,  #96
50cabdff1aSopenharmony_ci        sub             r3,  r3,  #96
51cabdff1aSopenharmony_ci        mov             lr,  r0
52cabdff1aSopenharmony_ci1:
53cabdff1aSopenharmony_ci        subs            r12, r12, #1
54cabdff1aSopenharmony_ci        vld1.16         {q8,  q9},  [r2]!
55cabdff1aSopenharmony_ci        vld1.16         {q0,  q1},  [r0, :128]!
56cabdff1aSopenharmony_ci        vld1.16         {q10, q11}, [r2]!
57cabdff1aSopenharmony_ci        vrhadd.u16      q0,  q0,  q8
58cabdff1aSopenharmony_ci        vld1.16         {q2,  q3},  [r0, :128]!
59cabdff1aSopenharmony_ci        vrhadd.u16      q1,  q1,  q9
60cabdff1aSopenharmony_ci        vld1.16         {q12, q13}, [r2]!
61cabdff1aSopenharmony_ci        vrhadd.u16      q2,  q2,  q10
62cabdff1aSopenharmony_ci        vst1.16         {q0,  q1},  [lr, :128]!
63cabdff1aSopenharmony_ci        vrhadd.u16      q3,  q3,  q11
64cabdff1aSopenharmony_ci        vld1.16         {q8,  q9},  [r0, :128]!
65cabdff1aSopenharmony_ci        vst1.16         {q2,  q3},  [lr, :128]!
66cabdff1aSopenharmony_ci        vrhadd.u16      q8,  q8,  q12
67cabdff1aSopenharmony_ci        vld1.16         {q14, q15}, [r2], r3
68cabdff1aSopenharmony_ci        vrhadd.u16      q9,  q9,  q13
69cabdff1aSopenharmony_ci        vld1.16         {q10, q11}, [r0, :128], r1
70cabdff1aSopenharmony_ci        vrhadd.u16      q10, q10, q14
71cabdff1aSopenharmony_ci        vst1.16         {q8,  q9},  [lr, :128]!
72cabdff1aSopenharmony_ci        vrhadd.u16      q11, q11, q15
73cabdff1aSopenharmony_ci        vst1.16         {q10, q11}, [lr, :128], r1
74cabdff1aSopenharmony_ci        bne             1b
75cabdff1aSopenharmony_ci        pop             {pc}
76cabdff1aSopenharmony_ciendfunc
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_cifunction ff_vp9_avg32_16_neon, export=1
79cabdff1aSopenharmony_ci        push            {lr}
80cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]
81cabdff1aSopenharmony_ci        sub             r1,  r1,  #32
82cabdff1aSopenharmony_ci        sub             r3,  r3,  #32
83cabdff1aSopenharmony_ci        mov             lr,  r0
84cabdff1aSopenharmony_ci1:
85cabdff1aSopenharmony_ci        subs            r12, r12, #1
86cabdff1aSopenharmony_ci        vld1.16         {q8,  q9},  [r2]!
87cabdff1aSopenharmony_ci        vld1.16         {q0,  q1},  [r0, :128]!
88cabdff1aSopenharmony_ci        vld1.16         {q10, q11}, [r2], r3
89cabdff1aSopenharmony_ci        vrhadd.u16      q0,  q0,  q8
90cabdff1aSopenharmony_ci        vld1.16         {q2,  q3},  [r0, :128], r1
91cabdff1aSopenharmony_ci        vrhadd.u16      q1,  q1,  q9
92cabdff1aSopenharmony_ci        vrhadd.u16      q2,  q2,  q10
93cabdff1aSopenharmony_ci        vst1.16         {q0, q1},  [lr, :128]!
94cabdff1aSopenharmony_ci        vrhadd.u16      q3,  q3,  q11
95cabdff1aSopenharmony_ci        vst1.16         {q2, q3},  [lr, :128], r1
96cabdff1aSopenharmony_ci        bne             1b
97cabdff1aSopenharmony_ci        pop             {pc}
98cabdff1aSopenharmony_ciendfunc
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_cifunction ff_vp9_avg16_16_neon, export=1
101cabdff1aSopenharmony_ci        ldr             r12, [sp]
102cabdff1aSopenharmony_ci1:
103cabdff1aSopenharmony_ci        subs            r12, r12, #1
104cabdff1aSopenharmony_ci        vld1.16         {q2,  q3},  [r2], r3
105cabdff1aSopenharmony_ci        vld1.16         {q0,  q1},  [r0, :128]
106cabdff1aSopenharmony_ci        vrhadd.u16      q0,  q0,  q2
107cabdff1aSopenharmony_ci        vrhadd.u16      q1,  q1,  q3
108cabdff1aSopenharmony_ci        vst1.16         {q0,  q1},  [r0, :128], r1
109cabdff1aSopenharmony_ci        bne             1b
110cabdff1aSopenharmony_ci        bx              lr
111cabdff1aSopenharmony_ciendfunc
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_cifunction ff_vp9_avg8_16_neon, export=1
114cabdff1aSopenharmony_ci        push            {lr}
115cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]
116cabdff1aSopenharmony_ci        mov             lr,  r0
117cabdff1aSopenharmony_ci1:
118cabdff1aSopenharmony_ci        subs            r12, r12, #2
119cabdff1aSopenharmony_ci        vld1.16         {q2},  [r2], r3
120cabdff1aSopenharmony_ci        vld1.16         {q0},  [r0, :128], r1
121cabdff1aSopenharmony_ci        vld1.16         {q3},  [r2], r3
122cabdff1aSopenharmony_ci        vrhadd.u16      q0,  q0,  q2
123cabdff1aSopenharmony_ci        vld1.16         {q1},  [r0, :128], r1
124cabdff1aSopenharmony_ci        vrhadd.u16      q1,  q1,  q3
125cabdff1aSopenharmony_ci        vst1.16         {q0},  [lr, :128], r1
126cabdff1aSopenharmony_ci        vst1.16         {q1},  [lr, :128], r1
127cabdff1aSopenharmony_ci        bne             1b
128cabdff1aSopenharmony_ci        pop             {pc}
129cabdff1aSopenharmony_ciendfunc
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_cifunction ff_vp9_avg4_16_neon, export=1
132cabdff1aSopenharmony_ci        ldr             r12, [sp]
133cabdff1aSopenharmony_ci1:
134cabdff1aSopenharmony_ci        subs            r12, r12, #2
135cabdff1aSopenharmony_ci        vld1.16         {d2},  [r2], r3
136cabdff1aSopenharmony_ci        vld1.16         {d0},  [r0, :64], r1
137cabdff1aSopenharmony_ci        vld1.16         {d3},  [r2], r3
138cabdff1aSopenharmony_ci        vrhadd.u16      d0,  d0,  d2
139cabdff1aSopenharmony_ci        vld1.16         {d1},  [r0, :64]
140cabdff1aSopenharmony_ci        sub             r0,  r0,  r1
141cabdff1aSopenharmony_ci        vrhadd.u16      d1,  d1,  d3
142cabdff1aSopenharmony_ci        vst1.16         {d0},  [r0, :64], r1
143cabdff1aSopenharmony_ci        vst1.16         {d1},  [r0, :64], r1
144cabdff1aSopenharmony_ci        bne             1b
145cabdff1aSopenharmony_ci        bx              lr
146cabdff1aSopenharmony_ciendfunc
147cabdff1aSopenharmony_ci
148cabdff1aSopenharmony_ci@ Helper macros for vmull/vmlal with a constant from either d0 or d1 depending on index
149cabdff1aSopenharmony_ci.macro vmull_lane dst, src, idx
150cabdff1aSopenharmony_ci.if \idx < 4
151cabdff1aSopenharmony_ci       vmull.s16        \dst, \src, d0[\idx]
152cabdff1aSopenharmony_ci.else
153cabdff1aSopenharmony_ci       vmull.s16        \dst, \src, d1[\idx - 4]
154cabdff1aSopenharmony_ci.endif
155cabdff1aSopenharmony_ci.endm
156cabdff1aSopenharmony_ci.macro vmlal_lane dst, src, idx
157cabdff1aSopenharmony_ci.if \idx < 4
158cabdff1aSopenharmony_ci       vmlal.s16        \dst, \src, d0[\idx]
159cabdff1aSopenharmony_ci.else
160cabdff1aSopenharmony_ci       vmlal.s16        \dst, \src, d1[\idx - 4]
161cabdff1aSopenharmony_ci.endif
162cabdff1aSopenharmony_ci.endm
163cabdff1aSopenharmony_ci
164cabdff1aSopenharmony_ci@ Extract a vector from src1-src2 and src3-src4, andmultiply-accumulate
165cabdff1aSopenharmony_ci@ into dst1 and dst3 (or dst1-dst2 and dst3-dst4 for size >= 8)
166cabdff1aSopenharmony_ci.macro extmlal dst1, dst2, dst3, dst4, src1, src2, src3, src4, offset, size
167cabdff1aSopenharmony_ci        vext.8          q14, \src1, \src2, #(2*\offset)
168cabdff1aSopenharmony_ci        vext.8          q15, \src3, \src4, #(2*\offset)
169cabdff1aSopenharmony_ci        vmlal_lane      \dst1,  d28, \offset
170cabdff1aSopenharmony_ci        vmlal_lane      \dst3,  d30, \offset
171cabdff1aSopenharmony_ci.if \size >= 8
172cabdff1aSopenharmony_ci        vmlal_lane      \dst2,  d29, \offset
173cabdff1aSopenharmony_ci        vmlal_lane      \dst4,  d31, \offset
174cabdff1aSopenharmony_ci.endif
175cabdff1aSopenharmony_ci.endm
176cabdff1aSopenharmony_ci
177cabdff1aSopenharmony_ci
178cabdff1aSopenharmony_ci@ Instantiate a horizontal filter function for the given size.
179cabdff1aSopenharmony_ci@ This can work on 4 or 8 pixels in parallel; for larger
180cabdff1aSopenharmony_ci@ widths it will do 8 pixels at a time and loop horizontally.
181cabdff1aSopenharmony_ci@ The actual width (in bytes) is passed in r5, the height in r4 and
182cabdff1aSopenharmony_ci@ the filter coefficients in r12.
183cabdff1aSopenharmony_ci.macro do_8tap_h type, size
184cabdff1aSopenharmony_cifunction \type\()_8tap_\size\()h
185cabdff1aSopenharmony_ci        sub             r2,  r2,  #6
186cabdff1aSopenharmony_ci        add             r6,  r0,  r1
187cabdff1aSopenharmony_ci        add             r7,  r2,  r3
188cabdff1aSopenharmony_ci        add             r1,  r1,  r1
189cabdff1aSopenharmony_ci        add             r3,  r3,  r3
190cabdff1aSopenharmony_ci        @ Only size >= 8 loops horizontally and needs
191cabdff1aSopenharmony_ci        @ reduced dst stride
192cabdff1aSopenharmony_ci.if \size >= 8
193cabdff1aSopenharmony_ci        sub             r1,  r1,  r5
194cabdff1aSopenharmony_ci.endif
195cabdff1aSopenharmony_ci        @ size >= 8 loads two qwords and increments r2,
196cabdff1aSopenharmony_ci        @ for size 4 it's enough with three dwords and no
197cabdff1aSopenharmony_ci        @ postincrement
198cabdff1aSopenharmony_ci.if \size >= 8
199cabdff1aSopenharmony_ci        sub             r3,  r3,  r5
200cabdff1aSopenharmony_ci        sub             r3,  r3,  #16
201cabdff1aSopenharmony_ci.endif
202cabdff1aSopenharmony_ci        @ Load the filter vector
203cabdff1aSopenharmony_ci        vld1.16         {q0},  [r12,:128]
204cabdff1aSopenharmony_ci1:
205cabdff1aSopenharmony_ci.if \size >= 8
206cabdff1aSopenharmony_ci        mov             r12, r5
207cabdff1aSopenharmony_ci.endif
208cabdff1aSopenharmony_ci        @ Load src
209cabdff1aSopenharmony_ci.if \size >= 8
210cabdff1aSopenharmony_ci        vld1.16         {q8,  q9},  [r2]!
211cabdff1aSopenharmony_ci        vld1.16         {q10, q11}, [r7]!
212cabdff1aSopenharmony_ci.else
213cabdff1aSopenharmony_ci        vld1.16         {d16, d17, d18}, [r2]
214cabdff1aSopenharmony_ci        vld1.16         {d20, d21, d22}, [r7]
215cabdff1aSopenharmony_ci.endif
216cabdff1aSopenharmony_ci2:
217cabdff1aSopenharmony_ci
218cabdff1aSopenharmony_ci        vmull.s16       q1,  d16, d0[0]
219cabdff1aSopenharmony_ci        vmull.s16       q12, d20, d0[0]
220cabdff1aSopenharmony_ci.if \size >= 8
221cabdff1aSopenharmony_ci        vmull.s16       q2,  d17, d0[0]
222cabdff1aSopenharmony_ci        vmull.s16       q13, d21, d0[0]
223cabdff1aSopenharmony_ci.endif
224cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 1, \size
225cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 2, \size
226cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 3, \size
227cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 4, \size
228cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 5, \size
229cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 6, \size
230cabdff1aSopenharmony_ci        extmlal         q1,  q2,  q12, q13, q8,  q9,  q10, q11, 7, \size
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci        @ Round, shift and saturate.
233cabdff1aSopenharmony_ci        @ The vqrshrun takes care of clamping negative values to zero, but
234cabdff1aSopenharmony_ci        @ we manually need to do vmin with the max pixel value.
235cabdff1aSopenharmony_ci        vqrshrun.s32    d2,  q1,  #7
236cabdff1aSopenharmony_ci        vqrshrun.s32    d24, q12, #7
237cabdff1aSopenharmony_ci.if \size >= 8
238cabdff1aSopenharmony_ci        vqrshrun.s32    d3,  q2,  #7
239cabdff1aSopenharmony_ci        vqrshrun.s32    d25, q13, #7
240cabdff1aSopenharmony_ci        vmin.u16        q1,  q1,  q3
241cabdff1aSopenharmony_ci        vmin.u16        q12, q12, q3
242cabdff1aSopenharmony_ci.else
243cabdff1aSopenharmony_ci        vmin.u16        d2,  d2,  d6
244cabdff1aSopenharmony_ci        vmin.u16        d24, d24, d6
245cabdff1aSopenharmony_ci.endif
246cabdff1aSopenharmony_ci        @ Average
247cabdff1aSopenharmony_ci.ifc \type,avg
248cabdff1aSopenharmony_ci.if \size >= 8
249cabdff1aSopenharmony_ci        vld1.16         {q14}, [r0,:128]
250cabdff1aSopenharmony_ci        vld1.16         {q15}, [r6,:128]
251cabdff1aSopenharmony_ci        vrhadd.u16      q1,  q1,  q14
252cabdff1aSopenharmony_ci        vrhadd.u16      q12, q12, q15
253cabdff1aSopenharmony_ci.else
254cabdff1aSopenharmony_ci        vld1.16         {d28}, [r0,:64]
255cabdff1aSopenharmony_ci        vld1.16         {d30}, [r6,:64]
256cabdff1aSopenharmony_ci        vrhadd.u16      d2,  d2,  d28
257cabdff1aSopenharmony_ci        vrhadd.u16      d24, d24, d30
258cabdff1aSopenharmony_ci.endif
259cabdff1aSopenharmony_ci.endif
260cabdff1aSopenharmony_ci        @ Store and loop horizontally (for size >= 8)
261cabdff1aSopenharmony_ci.if \size >= 8
262cabdff1aSopenharmony_ci        subs            r12, r12, #16
263cabdff1aSopenharmony_ci        vst1.16         {q1},  [r0,:128]!
264cabdff1aSopenharmony_ci        vst1.16         {q12}, [r6,:128]!
265cabdff1aSopenharmony_ci        beq             3f
266cabdff1aSopenharmony_ci        vmov            q8,  q9
267cabdff1aSopenharmony_ci        vmov            q10, q11
268cabdff1aSopenharmony_ci        vld1.16         {q9},  [r2]!
269cabdff1aSopenharmony_ci        vld1.16         {q11}, [r7]!
270cabdff1aSopenharmony_ci        b               2b
271cabdff1aSopenharmony_ci.else @ \size == 4
272cabdff1aSopenharmony_ci        vst1.16         {d2},  [r0,:64]
273cabdff1aSopenharmony_ci        vst1.16         {d24}, [r6,:64]
274cabdff1aSopenharmony_ci.endif
275cabdff1aSopenharmony_ci3:
276cabdff1aSopenharmony_ci        @ Loop vertically
277cabdff1aSopenharmony_ci        add             r0,  r0,  r1
278cabdff1aSopenharmony_ci        add             r6,  r6,  r1
279cabdff1aSopenharmony_ci        add             r2,  r2,  r3
280cabdff1aSopenharmony_ci        add             r7,  r7,  r3
281cabdff1aSopenharmony_ci        subs            r4,  r4,  #2
282cabdff1aSopenharmony_ci        bne             1b
283cabdff1aSopenharmony_ci        pop             {r4-r7}
284cabdff1aSopenharmony_ci        bx              lr
285cabdff1aSopenharmony_ciendfunc
286cabdff1aSopenharmony_ci.endm
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_ci.macro do_8tap_h_size size
289cabdff1aSopenharmony_cido_8tap_h put, \size
290cabdff1aSopenharmony_cido_8tap_h avg, \size
291cabdff1aSopenharmony_ci.endm
292cabdff1aSopenharmony_ci
293cabdff1aSopenharmony_cido_8tap_h_size 4
294cabdff1aSopenharmony_cido_8tap_h_size 8
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci.macro do_8tap_h_func type, filter, offset, size, bpp
297cabdff1aSopenharmony_cifunction ff_vp9_\type\()_\filter\()\size\()_h_\bpp\()_neon, export=1
298cabdff1aSopenharmony_ci        push            {r4-r7}
299cabdff1aSopenharmony_ci        ldr             r4,  [sp, #16]
300cabdff1aSopenharmony_ci        ldr             r5,  [sp, #20]
301cabdff1aSopenharmony_ci        vmvn.u16        q3,  #((0xffff << \bpp) & 0xffff)
302cabdff1aSopenharmony_ci        movrelx         r12, X(ff_vp9_subpel_filters), r6
303cabdff1aSopenharmony_ci        add             r12, r12, 256*\offset
304cabdff1aSopenharmony_ci        add             r12, r12, r5, lsl #4
305cabdff1aSopenharmony_ci        mov             r5,  #2*\size
306cabdff1aSopenharmony_ci.if \size >= 8
307cabdff1aSopenharmony_ci        b               \type\()_8tap_8h
308cabdff1aSopenharmony_ci.else
309cabdff1aSopenharmony_ci        b               \type\()_8tap_4h
310cabdff1aSopenharmony_ci.endif
311cabdff1aSopenharmony_ciendfunc
312cabdff1aSopenharmony_ci.endm
313cabdff1aSopenharmony_ci
314cabdff1aSopenharmony_ci.macro do_8tap_h_filters size, bpp
315cabdff1aSopenharmony_cido_8tap_h_func put, regular, 1, \size, \bpp
316cabdff1aSopenharmony_cido_8tap_h_func avg, regular, 1, \size, \bpp
317cabdff1aSopenharmony_cido_8tap_h_func put, sharp,   2, \size, \bpp
318cabdff1aSopenharmony_cido_8tap_h_func avg, sharp,   2, \size, \bpp
319cabdff1aSopenharmony_cido_8tap_h_func put, smooth,  0, \size, \bpp
320cabdff1aSopenharmony_cido_8tap_h_func avg, smooth,  0, \size, \bpp
321cabdff1aSopenharmony_ci.endm
322cabdff1aSopenharmony_ci
323cabdff1aSopenharmony_ci.macro do_8tap_h_filters_bpp bpp
324cabdff1aSopenharmony_cido_8tap_h_filters 64, \bpp
325cabdff1aSopenharmony_cido_8tap_h_filters 32, \bpp
326cabdff1aSopenharmony_cido_8tap_h_filters 16, \bpp
327cabdff1aSopenharmony_cido_8tap_h_filters 8,  \bpp
328cabdff1aSopenharmony_cido_8tap_h_filters 4,  \bpp
329cabdff1aSopenharmony_ci.endm
330cabdff1aSopenharmony_ci
331cabdff1aSopenharmony_cido_8tap_h_filters_bpp 10
332cabdff1aSopenharmony_cido_8tap_h_filters_bpp 12
333cabdff1aSopenharmony_ci
334cabdff1aSopenharmony_ci.ltorg
335cabdff1aSopenharmony_ci
336cabdff1aSopenharmony_ci@ Vertical filters
337cabdff1aSopenharmony_ci
338cabdff1aSopenharmony_ci@ Round, shift and saturate and store qreg1-4
339cabdff1aSopenharmony_ci.macro do_store4 qreg1, dreg1, qreg2, dreg2, qreg3, dreg3, qreg4, dreg4, tmp1, tmp2, tmp3, tmp4, minreg, type
340cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg1,  \qreg1, #7
341cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg2,  \qreg2, #7
342cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg3,  \qreg3, #7
343cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg4,  \qreg4, #7
344cabdff1aSopenharmony_ci.ifc \type,avg
345cabdff1aSopenharmony_ci        vld1.16         {\tmp1},  [r6,:64], r1
346cabdff1aSopenharmony_ci        vld1.16         {\tmp2},  [r6,:64], r1
347cabdff1aSopenharmony_ci        vld1.16         {\tmp3},  [r6,:64], r1
348cabdff1aSopenharmony_ci        vld1.16         {\tmp4},  [r6,:64], r1
349cabdff1aSopenharmony_ci.endif
350cabdff1aSopenharmony_ci        vmin.u16        \dreg1,  \dreg1,  \minreg
351cabdff1aSopenharmony_ci        vmin.u16        \dreg2,  \dreg2,  \minreg
352cabdff1aSopenharmony_ci        vmin.u16        \dreg3,  \dreg3,  \minreg
353cabdff1aSopenharmony_ci        vmin.u16        \dreg4,  \dreg4,  \minreg
354cabdff1aSopenharmony_ci.ifc \type,avg
355cabdff1aSopenharmony_ci        vrhadd.u16      \dreg1,  \dreg1,  \tmp1
356cabdff1aSopenharmony_ci        vrhadd.u16      \dreg2,  \dreg2,  \tmp2
357cabdff1aSopenharmony_ci        vrhadd.u16      \dreg3,  \dreg3,  \tmp3
358cabdff1aSopenharmony_ci        vrhadd.u16      \dreg4,  \dreg4,  \tmp4
359cabdff1aSopenharmony_ci.endif
360cabdff1aSopenharmony_ci        vst1.16         {\dreg1}, [r0,:64], r1
361cabdff1aSopenharmony_ci        vst1.16         {\dreg2}, [r0,:64], r1
362cabdff1aSopenharmony_ci        vst1.16         {\dreg3}, [r0,:64], r1
363cabdff1aSopenharmony_ci        vst1.16         {\dreg4}, [r0,:64], r1
364cabdff1aSopenharmony_ci.endm
365cabdff1aSopenharmony_ci
366cabdff1aSopenharmony_ci@ Round, shift and saturate and store qreg1-4
367cabdff1aSopenharmony_ci@ qreg1-2 belong to one line and qreg3-4 to the second line.
368cabdff1aSopenharmony_ci@ dreg1-2 == qreg1, dreg3-4 == qreg2.
369cabdff1aSopenharmony_ci.macro do_store8 qreg1, qreg2, qreg3, qreg4, dreg1, dreg2, dreg3, dreg4, minreg, type
370cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg1,  \qreg1, #7
371cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg2,  \qreg2, #7
372cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg3,  \qreg3, #7
373cabdff1aSopenharmony_ci        vqrshrun.s32    \dreg4,  \qreg4, #7
374cabdff1aSopenharmony_ci.ifc \type,avg
375cabdff1aSopenharmony_ci        vld1.16         {\qreg3},  [r6,:128], r1
376cabdff1aSopenharmony_ci        vld1.16         {\qreg4},  [r6,:128], r1
377cabdff1aSopenharmony_ci.endif
378cabdff1aSopenharmony_ci        vmin.u16        \qreg1,  \qreg1,  \minreg
379cabdff1aSopenharmony_ci        vmin.u16        \qreg2,  \qreg2,  \minreg
380cabdff1aSopenharmony_ci.ifc \type,avg
381cabdff1aSopenharmony_ci        vrhadd.u16      \qreg1,  \qreg1,  \qreg3
382cabdff1aSopenharmony_ci        vrhadd.u16      \qreg2,  \qreg2,  \qreg4
383cabdff1aSopenharmony_ci.endif
384cabdff1aSopenharmony_ci        vst1.16         {\qreg1}, [r0,:128], r1
385cabdff1aSopenharmony_ci        vst1.16         {\qreg2}, [r0,:128], r1
386cabdff1aSopenharmony_ci.endm
387cabdff1aSopenharmony_ci
388cabdff1aSopenharmony_ci@ Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst2
389cabdff1aSopenharmony_ci@ (src1-src8 into dst1, src2-src9 into dst2).
390cabdff1aSopenharmony_ci.macro convolve4 dst1, dst2, src1, src2, src3, src4, src5, src6, src7, src8, src9, tmp1, tmp2
391cabdff1aSopenharmony_ci        vmull.s16       \dst1, \src1, d0[0]
392cabdff1aSopenharmony_ci        vmull.s16       \dst2, \src2, d0[0]
393cabdff1aSopenharmony_ci        vmull.s16       \tmp1, \src2, d0[1]
394cabdff1aSopenharmony_ci        vmull.s16       \tmp2, \src3, d0[1]
395cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src3, d0[2]
396cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src4, d0[2]
397cabdff1aSopenharmony_ci        vmlal.s16       \tmp1, \src4, d0[3]
398cabdff1aSopenharmony_ci        vmlal.s16       \tmp2, \src5, d0[3]
399cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src5, d1[0]
400cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src6, d1[0]
401cabdff1aSopenharmony_ci        vmlal.s16       \tmp1, \src6, d1[1]
402cabdff1aSopenharmony_ci        vmlal.s16       \tmp2, \src7, d1[1]
403cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src7, d1[2]
404cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src8, d1[2]
405cabdff1aSopenharmony_ci        vmlal.s16       \tmp1, \src8, d1[3]
406cabdff1aSopenharmony_ci        vmlal.s16       \tmp2, \src9, d1[3]
407cabdff1aSopenharmony_ci        vadd.s32        \dst1, \dst1, \tmp1
408cabdff1aSopenharmony_ci        vadd.s32        \dst2, \dst2, \tmp2
409cabdff1aSopenharmony_ci.endm
410cabdff1aSopenharmony_ci
411cabdff1aSopenharmony_ci@ Evaluate the filter twice in parallel. This does the same as convolve4 above,
412cabdff1aSopenharmony_ci@ but with double width (two input/output registers per row).
413cabdff1aSopenharmony_ci.macro convolve8 dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10, src11, src12, src13, src14, src15, src16, src17, src18
414cabdff1aSopenharmony_ci        vmull.s16       \dst1, \src1,  d0[0]
415cabdff1aSopenharmony_ci        vmull.s16       \dst2, \src2,  d0[0]
416cabdff1aSopenharmony_ci        vmull.s16       \dst3, \src3,  d0[0]
417cabdff1aSopenharmony_ci        vmull.s16       \dst4, \src4,  d0[0]
418cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src3,  d0[1]
419cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src4,  d0[1]
420cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src5,  d0[1]
421cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src6,  d0[1]
422cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src5,  d0[2]
423cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src6,  d0[2]
424cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src7,  d0[2]
425cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src8,  d0[2]
426cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src7,  d0[3]
427cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src8,  d0[3]
428cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src9,  d0[3]
429cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src10, d0[3]
430cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src9,  d1[0]
431cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src10, d1[0]
432cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src11, d1[0]
433cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src12, d1[0]
434cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src11, d1[1]
435cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src12, d1[1]
436cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src13, d1[1]
437cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src14, d1[1]
438cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src13, d1[2]
439cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src14, d1[2]
440cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src15, d1[2]
441cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src16, d1[2]
442cabdff1aSopenharmony_ci        vmlal.s16       \dst1, \src15, d1[3]
443cabdff1aSopenharmony_ci        vmlal.s16       \dst2, \src16, d1[3]
444cabdff1aSopenharmony_ci        vmlal.s16       \dst3, \src17, d1[3]
445cabdff1aSopenharmony_ci        vmlal.s16       \dst4, \src18, d1[3]
446cabdff1aSopenharmony_ci.endm
447cabdff1aSopenharmony_ci
448cabdff1aSopenharmony_ci@ Instantiate a vertical filter function for filtering 8 pixels at a time.
449cabdff1aSopenharmony_ci@ The height is passed in r4, the width in r5 and the filter coefficients
450cabdff1aSopenharmony_ci@ in r12.
451cabdff1aSopenharmony_ci.macro do_8tap_8v type
452cabdff1aSopenharmony_cifunction \type\()_8tap_8v
453cabdff1aSopenharmony_ci        sub             r2,  r2,  r3, lsl #1
454cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
455cabdff1aSopenharmony_ci        vld1.16         {q0},  [r12, :128]
456cabdff1aSopenharmony_ci1:
457cabdff1aSopenharmony_ci.ifc \type,avg
458cabdff1aSopenharmony_ci        mov             r6,  r0
459cabdff1aSopenharmony_ci.endif
460cabdff1aSopenharmony_ci        mov             r12, r4
461cabdff1aSopenharmony_ci
462cabdff1aSopenharmony_ci        vld1.16         {q5},  [r2], r3
463cabdff1aSopenharmony_ci        vld1.16         {q6},  [r2], r3
464cabdff1aSopenharmony_ci        vld1.16         {q7},  [r2], r3
465cabdff1aSopenharmony_ci        vld1.16         {q8},  [r2], r3
466cabdff1aSopenharmony_ci        vld1.16         {q9},  [r2], r3
467cabdff1aSopenharmony_ci        vld1.16         {q10}, [r2], r3
468cabdff1aSopenharmony_ci        vld1.16         {q11}, [r2], r3
469cabdff1aSopenharmony_ci2:
470cabdff1aSopenharmony_ci        vld1.16         {q12}, [r2], r3
471cabdff1aSopenharmony_ci        vld1.16         {q13}, [r2], r3
472cabdff1aSopenharmony_ci        vld1.16         {q14}, [r2], r3
473cabdff1aSopenharmony_ci        vld1.16         {q15}, [r2], r3
474cabdff1aSopenharmony_ci        convolve8       q2,  q3,  q4,  q5,  d10, d11, d12, d13, d14, d15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27
475cabdff1aSopenharmony_ci        do_store8       q2,  q3,  q4,  q5,  d4,  d5,  d6,  d7,  q1,  \type
476cabdff1aSopenharmony_ci        convolve8       q2,  q3,  q4,  q5,  d14, d15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
477cabdff1aSopenharmony_ci        do_store8       q2,  q3,  q4,  q5,  d4,  d5,  d6,  d7,  q1,  \type
478cabdff1aSopenharmony_ci
479cabdff1aSopenharmony_ci        subs            r12, r12, #4
480cabdff1aSopenharmony_ci        beq             8f
481cabdff1aSopenharmony_ci
482cabdff1aSopenharmony_ci        vld1.16         {q4},  [r2], r3
483cabdff1aSopenharmony_ci        vld1.16         {q5},  [r2], r3
484cabdff1aSopenharmony_ci        vld1.16         {q6},  [r2], r3
485cabdff1aSopenharmony_ci        vld1.16         {q7},  [r2], r3
486cabdff1aSopenharmony_ci        convolve8       q2,  q3,  q8,  q9,  d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, d8,  d9,  d10, d11
487cabdff1aSopenharmony_ci        do_store8       q2,  q3,  q8,  q9,  d4,  d5,  d6,  d7,  q1,  \type
488cabdff1aSopenharmony_ci        convolve8       q2,  q3,  q8,  q9,  d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, d8,  d9,  d10, d11, d12, d13, d14, d15
489cabdff1aSopenharmony_ci        do_store8       q2,  q3,  q8,  q9,  d4,  d5,  d6,  d7,  q1,  \type
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_ci        subs            r12, r12, #4
492cabdff1aSopenharmony_ci        beq             8f
493cabdff1aSopenharmony_ci
494cabdff1aSopenharmony_ci        vld1.16         {q8},  [r2], r3
495cabdff1aSopenharmony_ci        vld1.16         {q9},  [r2], r3
496cabdff1aSopenharmony_ci        vld1.16         {q10}, [r2], r3
497cabdff1aSopenharmony_ci        vld1.16         {q11}, [r2], r3
498cabdff1aSopenharmony_ci        convolve8       q2,  q3,  q12, q13, d26, d27, d28, d29, d30, d31, d8,  d9,  d10, d11, d12, d13, d14, d15, d16, d17, d18, d19
499cabdff1aSopenharmony_ci        do_store8       q2,  q3,  q12, q13, d4,  d5,  d6,  d7,  q1,  \type
500cabdff1aSopenharmony_ci        convolve8       q2,  q3,  q12, q13, d30, d31, d8,  d9,  d10, d11, d12, d13, d14, d15, d16, d17, d18, d19, d20, d21, d22, d23
501cabdff1aSopenharmony_ci        do_store8       q2,  q3,  q12, q13, d4,  d5,  d6,  d7,  q1,  \type
502cabdff1aSopenharmony_ci
503cabdff1aSopenharmony_ci        subs            r12, r12, #4
504cabdff1aSopenharmony_ci        bne             2b
505cabdff1aSopenharmony_ci
506cabdff1aSopenharmony_ci8:
507cabdff1aSopenharmony_ci        subs            r5,  r5,  #8
508cabdff1aSopenharmony_ci        beq             9f
509cabdff1aSopenharmony_ci        @ r0 -= h * dst_stride
510cabdff1aSopenharmony_ci        mls             r0,  r1,  r4, r0
511cabdff1aSopenharmony_ci        @ r2 -= h * src_stride
512cabdff1aSopenharmony_ci        mls             r2,  r3,  r4, r2
513cabdff1aSopenharmony_ci        @ r2 -= 8 * src_stride
514cabdff1aSopenharmony_ci        sub             r2,  r2,  r3, lsl #3
515cabdff1aSopenharmony_ci        @ r2 += 1 * src_stride
516cabdff1aSopenharmony_ci        add             r2,  r2,  r3
517cabdff1aSopenharmony_ci        add             r2,  r2,  #16
518cabdff1aSopenharmony_ci        add             r0,  r0,  #16
519cabdff1aSopenharmony_ci        b               1b
520cabdff1aSopenharmony_ci9:
521cabdff1aSopenharmony_ci        vpop            {q4-q7}
522cabdff1aSopenharmony_ci        pop             {r4-r6}
523cabdff1aSopenharmony_ci        bx              lr
524cabdff1aSopenharmony_ciendfunc
525cabdff1aSopenharmony_ci.endm
526cabdff1aSopenharmony_ci
527cabdff1aSopenharmony_cido_8tap_8v put
528cabdff1aSopenharmony_cido_8tap_8v avg
529cabdff1aSopenharmony_ci
530cabdff1aSopenharmony_ci@ Instantiate a vertical filter function for filtering a 4 pixels wide
531cabdff1aSopenharmony_ci@ slice. This only is designed to work for 4 or 8 output lines.
532cabdff1aSopenharmony_ci.macro do_8tap_4v type
533cabdff1aSopenharmony_cifunction \type\()_8tap_4v
534cabdff1aSopenharmony_ci        sub             r2,  r2,  r3, lsl #1
535cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
536cabdff1aSopenharmony_ci        vld1.16         {q0},  [r12, :128]
537cabdff1aSopenharmony_ci.ifc \type,avg
538cabdff1aSopenharmony_ci        mov             r6,  r0
539cabdff1aSopenharmony_ci.endif
540cabdff1aSopenharmony_ci
541cabdff1aSopenharmony_ci        vld1.16         {d16}, [r2], r3
542cabdff1aSopenharmony_ci        vld1.16         {d17}, [r2], r3
543cabdff1aSopenharmony_ci        vld1.16         {d18}, [r2], r3
544cabdff1aSopenharmony_ci        vld1.16         {d19}, [r2], r3
545cabdff1aSopenharmony_ci        vld1.16         {d20}, [r2], r3
546cabdff1aSopenharmony_ci        vld1.16         {d21}, [r2], r3
547cabdff1aSopenharmony_ci        vld1.16         {d22}, [r2], r3
548cabdff1aSopenharmony_ci        vld1.16         {d23}, [r2], r3
549cabdff1aSopenharmony_ci        vld1.16         {d24}, [r2], r3
550cabdff1aSopenharmony_ci        vld1.16         {d25}, [r2], r3
551cabdff1aSopenharmony_ci        vld1.16         {d26}, [r2], r3
552cabdff1aSopenharmony_ci        convolve4       q2,  q3,  d16, d17, d18, d19, d20, d21, d22, d23, d24, q14, q15
553cabdff1aSopenharmony_ci        convolve4       q14, q15, d18, d19, d20, d21, d22, d23, d24, d25, d26, q8,  q9
554cabdff1aSopenharmony_ci        do_store4       q2,  d4,  q3,  d6,  q14, d28, q15, d30, d5,  d7,  d29, d31, d2,  \type
555cabdff1aSopenharmony_ci
556cabdff1aSopenharmony_ci        subs            r4,  r4,  #4
557cabdff1aSopenharmony_ci        beq             9f
558cabdff1aSopenharmony_ci
559cabdff1aSopenharmony_ci        vld1.16         {d27}, [r2], r3
560cabdff1aSopenharmony_ci        vld1.16         {d28}, [r2], r3
561cabdff1aSopenharmony_ci        vld1.16         {d29}, [r2], r3
562cabdff1aSopenharmony_ci        vld1.16         {d30}, [r2], r3
563cabdff1aSopenharmony_ci        convolve4       q2,  q3,  d20, d21, d22, d23, d24, d25, d26, d27, d28, q8,  q9
564cabdff1aSopenharmony_ci        convolve4       q8,  q9,  d22, d23, d24, d25, d26, d27, d28, d29, d30, q10, q11
565cabdff1aSopenharmony_ci        do_store4       q2,  d4,  q3,  d6,  q8,  d16, q9,  d18, d5,  d7,  d17, d19, d2,  \type
566cabdff1aSopenharmony_ci
567cabdff1aSopenharmony_ci9:
568cabdff1aSopenharmony_ci        pop             {r4-r6}
569cabdff1aSopenharmony_ci        bx              lr
570cabdff1aSopenharmony_ciendfunc
571cabdff1aSopenharmony_ci.endm
572cabdff1aSopenharmony_ci
573cabdff1aSopenharmony_cido_8tap_4v put
574cabdff1aSopenharmony_cido_8tap_4v avg
575cabdff1aSopenharmony_ci
576cabdff1aSopenharmony_ci.macro do_8tap_v_func type, filter, offset, size, bpp
577cabdff1aSopenharmony_cifunction ff_vp9_\type\()_\filter\()\size\()_v_\bpp\()_neon, export=1
578cabdff1aSopenharmony_ci        push            {r4-r6}
579cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]
580cabdff1aSopenharmony_ci        ldr             r5,  [sp, #20]
581cabdff1aSopenharmony_ci.if \size >= 8
582cabdff1aSopenharmony_ci        vpush           {q4-q7}
583cabdff1aSopenharmony_ci.endif
584cabdff1aSopenharmony_ci        vmvn.u16        q1,  #((0xffff << \bpp) & 0xffff)
585cabdff1aSopenharmony_ci        movrelx         r12, X(ff_vp9_subpel_filters), r6
586cabdff1aSopenharmony_ci        add             r12, r12, 256*\offset
587cabdff1aSopenharmony_ci        add             r12, r12, r5, lsl #4
588cabdff1aSopenharmony_ci        mov             r5,  #\size
589cabdff1aSopenharmony_ci.if \size >= 8
590cabdff1aSopenharmony_ci        b               \type\()_8tap_8v
591cabdff1aSopenharmony_ci.else
592cabdff1aSopenharmony_ci        b               \type\()_8tap_4v
593cabdff1aSopenharmony_ci.endif
594cabdff1aSopenharmony_ciendfunc
595cabdff1aSopenharmony_ci.endm
596cabdff1aSopenharmony_ci
597cabdff1aSopenharmony_ci.macro do_8tap_v_filters size, bpp
598cabdff1aSopenharmony_cido_8tap_v_func put, regular, 1, \size, \bpp
599cabdff1aSopenharmony_cido_8tap_v_func avg, regular, 1, \size, \bpp
600cabdff1aSopenharmony_cido_8tap_v_func put, sharp,   2, \size, \bpp
601cabdff1aSopenharmony_cido_8tap_v_func avg, sharp,   2, \size, \bpp
602cabdff1aSopenharmony_cido_8tap_v_func put, smooth,  0, \size, \bpp
603cabdff1aSopenharmony_cido_8tap_v_func avg, smooth,  0, \size, \bpp
604cabdff1aSopenharmony_ci.endm
605cabdff1aSopenharmony_ci
606cabdff1aSopenharmony_ci.macro do_8tap_v_filters_bpp bpp
607cabdff1aSopenharmony_cido_8tap_v_filters 64, \bpp
608cabdff1aSopenharmony_cido_8tap_v_filters 32, \bpp
609cabdff1aSopenharmony_cido_8tap_v_filters 16, \bpp
610cabdff1aSopenharmony_cido_8tap_v_filters 8,  \bpp
611cabdff1aSopenharmony_cido_8tap_v_filters 4,  \bpp
612cabdff1aSopenharmony_ci.endm
613cabdff1aSopenharmony_ci
614cabdff1aSopenharmony_cido_8tap_v_filters_bpp 10
615cabdff1aSopenharmony_cido_8tap_v_filters_bpp 12
616