1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * HEVC video decoder
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (C) 2012 - 2013 Guillaume Martres
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * This file is part of FFmpeg.
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci *
13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci * Lesser General Public License for more details.
17cabdff1aSopenharmony_ci *
18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci */
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "get_bits.h"
24cabdff1aSopenharmony_ci#include "hevcdec.h"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#include "bit_depth_template.c"
27cabdff1aSopenharmony_ci#include "hevcdsp.h"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_cistatic void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
30cabdff1aSopenharmony_ci                          GetBitContext *gb, int pcm_bit_depth)
31cabdff1aSopenharmony_ci{
32cabdff1aSopenharmony_ci    int x, y;
33cabdff1aSopenharmony_ci    pixel *dst = (pixel *)_dst;
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_ci    stride /= sizeof(pixel);
36cabdff1aSopenharmony_ci
37cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
38cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
39cabdff1aSopenharmony_ci            dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
40cabdff1aSopenharmony_ci        dst += stride;
41cabdff1aSopenharmony_ci    }
42cabdff1aSopenharmony_ci}
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_cistatic av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res,
45cabdff1aSopenharmony_ci                                                ptrdiff_t stride, int size)
46cabdff1aSopenharmony_ci{
47cabdff1aSopenharmony_ci    int x, y;
48cabdff1aSopenharmony_ci    pixel *dst = (pixel *)_dst;
49cabdff1aSopenharmony_ci
50cabdff1aSopenharmony_ci    stride /= sizeof(pixel);
51cabdff1aSopenharmony_ci
52cabdff1aSopenharmony_ci    for (y = 0; y < size; y++) {
53cabdff1aSopenharmony_ci        for (x = 0; x < size; x++) {
54cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(dst[x] + *res);
55cabdff1aSopenharmony_ci            res++;
56cabdff1aSopenharmony_ci        }
57cabdff1aSopenharmony_ci        dst += stride;
58cabdff1aSopenharmony_ci    }
59cabdff1aSopenharmony_ci}
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_cistatic void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res,
62cabdff1aSopenharmony_ci                                  ptrdiff_t stride)
63cabdff1aSopenharmony_ci{
64cabdff1aSopenharmony_ci    FUNC(add_residual)(_dst, res, stride, 4);
65cabdff1aSopenharmony_ci}
66cabdff1aSopenharmony_ci
67cabdff1aSopenharmony_cistatic void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res,
68cabdff1aSopenharmony_ci                                  ptrdiff_t stride)
69cabdff1aSopenharmony_ci{
70cabdff1aSopenharmony_ci    FUNC(add_residual)(_dst, res, stride, 8);
71cabdff1aSopenharmony_ci}
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_cistatic void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res,
74cabdff1aSopenharmony_ci                                    ptrdiff_t stride)
75cabdff1aSopenharmony_ci{
76cabdff1aSopenharmony_ci    FUNC(add_residual)(_dst, res, stride, 16);
77cabdff1aSopenharmony_ci}
78cabdff1aSopenharmony_ci
79cabdff1aSopenharmony_cistatic void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res,
80cabdff1aSopenharmony_ci                                    ptrdiff_t stride)
81cabdff1aSopenharmony_ci{
82cabdff1aSopenharmony_ci    FUNC(add_residual)(_dst, res, stride, 32);
83cabdff1aSopenharmony_ci}
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_cistatic void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
86cabdff1aSopenharmony_ci{
87cabdff1aSopenharmony_ci    int16_t *coeffs = (int16_t *) _coeffs;
88cabdff1aSopenharmony_ci    int x, y;
89cabdff1aSopenharmony_ci    int size = 1 << log2_size;
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci    if (mode) {
92cabdff1aSopenharmony_ci        coeffs += size;
93cabdff1aSopenharmony_ci        for (y = 0; y < size - 1; y++) {
94cabdff1aSopenharmony_ci            for (x = 0; x < size; x++)
95cabdff1aSopenharmony_ci                coeffs[x] += coeffs[x - size];
96cabdff1aSopenharmony_ci            coeffs += size;
97cabdff1aSopenharmony_ci        }
98cabdff1aSopenharmony_ci    } else {
99cabdff1aSopenharmony_ci        for (y = 0; y < size; y++) {
100cabdff1aSopenharmony_ci            for (x = 1; x < size; x++)
101cabdff1aSopenharmony_ci                coeffs[x] += coeffs[x - 1];
102cabdff1aSopenharmony_ci            coeffs += size;
103cabdff1aSopenharmony_ci        }
104cabdff1aSopenharmony_ci    }
105cabdff1aSopenharmony_ci}
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_cistatic void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
108cabdff1aSopenharmony_ci{
109cabdff1aSopenharmony_ci    int shift  = 15 - BIT_DEPTH - log2_size;
110cabdff1aSopenharmony_ci    int x, y;
111cabdff1aSopenharmony_ci    int size = 1 << log2_size;
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_ci    if (shift > 0) {
114cabdff1aSopenharmony_ci        int offset = 1 << (shift - 1);
115cabdff1aSopenharmony_ci        for (y = 0; y < size; y++) {
116cabdff1aSopenharmony_ci            for (x = 0; x < size; x++) {
117cabdff1aSopenharmony_ci                *coeffs = (*coeffs + offset) >> shift;
118cabdff1aSopenharmony_ci                coeffs++;
119cabdff1aSopenharmony_ci            }
120cabdff1aSopenharmony_ci        }
121cabdff1aSopenharmony_ci    } else {
122cabdff1aSopenharmony_ci        for (y = 0; y < size; y++) {
123cabdff1aSopenharmony_ci            for (x = 0; x < size; x++) {
124cabdff1aSopenharmony_ci                *coeffs = *(uint16_t*)coeffs << -shift;
125cabdff1aSopenharmony_ci                coeffs++;
126cabdff1aSopenharmony_ci            }
127cabdff1aSopenharmony_ci        }
128cabdff1aSopenharmony_ci    }
129cabdff1aSopenharmony_ci}
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ci#define SET(dst, x)   (dst) = (x)
132cabdff1aSopenharmony_ci#define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_ci#define TR_4x4_LUMA(dst, src, step, assign)                             \
135cabdff1aSopenharmony_ci    do {                                                                \
136cabdff1aSopenharmony_ci        int c0 = src[0 * step] + src[2 * step];                         \
137cabdff1aSopenharmony_ci        int c1 = src[2 * step] + src[3 * step];                         \
138cabdff1aSopenharmony_ci        int c2 = src[0 * step] - src[3 * step];                         \
139cabdff1aSopenharmony_ci        int c3 = 74 * src[1 * step];                                    \
140cabdff1aSopenharmony_ci                                                                        \
141cabdff1aSopenharmony_ci        assign(dst[2 * step], 74 * (src[0 * step] -                     \
142cabdff1aSopenharmony_ci                                    src[2 * step] +                     \
143cabdff1aSopenharmony_ci                                    src[3 * step]));                    \
144cabdff1aSopenharmony_ci        assign(dst[0 * step], 29 * c0 + 55 * c1 + c3);                  \
145cabdff1aSopenharmony_ci        assign(dst[1 * step], 55 * c2 - 29 * c1 + c3);                  \
146cabdff1aSopenharmony_ci        assign(dst[3 * step], 55 * c0 + 29 * c2 - c3);                  \
147cabdff1aSopenharmony_ci    } while (0)
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_cistatic void FUNC(transform_4x4_luma)(int16_t *coeffs)
150cabdff1aSopenharmony_ci{
151cabdff1aSopenharmony_ci    int i;
152cabdff1aSopenharmony_ci    int shift    = 7;
153cabdff1aSopenharmony_ci    int add      = 1 << (shift - 1);
154cabdff1aSopenharmony_ci    int16_t *src = coeffs;
155cabdff1aSopenharmony_ci
156cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
157cabdff1aSopenharmony_ci        TR_4x4_LUMA(src, src, 4, SCALE);
158cabdff1aSopenharmony_ci        src++;
159cabdff1aSopenharmony_ci    }
160cabdff1aSopenharmony_ci
161cabdff1aSopenharmony_ci    shift = 20 - BIT_DEPTH;
162cabdff1aSopenharmony_ci    add   = 1 << (shift - 1);
163cabdff1aSopenharmony_ci    for (i = 0; i < 4; i++) {
164cabdff1aSopenharmony_ci        TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
165cabdff1aSopenharmony_ci        coeffs += 4;
166cabdff1aSopenharmony_ci    }
167cabdff1aSopenharmony_ci}
168cabdff1aSopenharmony_ci
169cabdff1aSopenharmony_ci#undef TR_4x4_LUMA
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ci#define TR_4(dst, src, dstep, sstep, assign, end)                 \
172cabdff1aSopenharmony_ci    do {                                                          \
173cabdff1aSopenharmony_ci        const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
174cabdff1aSopenharmony_ci        const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
175cabdff1aSopenharmony_ci        const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
176cabdff1aSopenharmony_ci        const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
177cabdff1aSopenharmony_ci                                                                  \
178cabdff1aSopenharmony_ci        assign(dst[0 * dstep], e0 + o0);                          \
179cabdff1aSopenharmony_ci        assign(dst[1 * dstep], e1 + o1);                          \
180cabdff1aSopenharmony_ci        assign(dst[2 * dstep], e1 - o1);                          \
181cabdff1aSopenharmony_ci        assign(dst[3 * dstep], e0 - o0);                          \
182cabdff1aSopenharmony_ci    } while (0)
183cabdff1aSopenharmony_ci
184cabdff1aSopenharmony_ci#define TR_8(dst, src, dstep, sstep, assign, end)                 \
185cabdff1aSopenharmony_ci    do {                                                          \
186cabdff1aSopenharmony_ci        int i, j;                                                 \
187cabdff1aSopenharmony_ci        int e_8[4];                                               \
188cabdff1aSopenharmony_ci        int o_8[4] = { 0 };                                       \
189cabdff1aSopenharmony_ci        for (i = 0; i < 4; i++)                                   \
190cabdff1aSopenharmony_ci            for (j = 1; j < end; j += 2)                          \
191cabdff1aSopenharmony_ci                o_8[i] += transform[4 * j][i] * src[j * sstep];   \
192cabdff1aSopenharmony_ci        TR_4(e_8, src, 1, 2 * sstep, SET, 4);                     \
193cabdff1aSopenharmony_ci                                                                  \
194cabdff1aSopenharmony_ci        for (i = 0; i < 4; i++) {                                 \
195cabdff1aSopenharmony_ci            assign(dst[i * dstep], e_8[i] + o_8[i]);              \
196cabdff1aSopenharmony_ci            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);        \
197cabdff1aSopenharmony_ci        }                                                         \
198cabdff1aSopenharmony_ci    } while (0)
199cabdff1aSopenharmony_ci
200cabdff1aSopenharmony_ci#define TR_16(dst, src, dstep, sstep, assign, end)                \
201cabdff1aSopenharmony_ci    do {                                                          \
202cabdff1aSopenharmony_ci        int i, j;                                                 \
203cabdff1aSopenharmony_ci        int e_16[8];                                              \
204cabdff1aSopenharmony_ci        int o_16[8] = { 0 };                                      \
205cabdff1aSopenharmony_ci        for (i = 0; i < 8; i++)                                   \
206cabdff1aSopenharmony_ci            for (j = 1; j < end; j += 2)                          \
207cabdff1aSopenharmony_ci                o_16[i] += transform[2 * j][i] * src[j * sstep];  \
208cabdff1aSopenharmony_ci        TR_8(e_16, src, 1, 2 * sstep, SET, 8);                    \
209cabdff1aSopenharmony_ci                                                                  \
210cabdff1aSopenharmony_ci        for (i = 0; i < 8; i++) {                                 \
211cabdff1aSopenharmony_ci            assign(dst[i * dstep], e_16[i] + o_16[i]);            \
212cabdff1aSopenharmony_ci            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);     \
213cabdff1aSopenharmony_ci        }                                                         \
214cabdff1aSopenharmony_ci    } while (0)
215cabdff1aSopenharmony_ci
216cabdff1aSopenharmony_ci#define TR_32(dst, src, dstep, sstep, assign, end)                \
217cabdff1aSopenharmony_ci    do {                                                          \
218cabdff1aSopenharmony_ci        int i, j;                                                 \
219cabdff1aSopenharmony_ci        int e_32[16];                                             \
220cabdff1aSopenharmony_ci        int o_32[16] = { 0 };                                     \
221cabdff1aSopenharmony_ci        for (i = 0; i < 16; i++)                                  \
222cabdff1aSopenharmony_ci            for (j = 1; j < end; j += 2)                          \
223cabdff1aSopenharmony_ci                o_32[i] += transform[j][i] * src[j * sstep];      \
224cabdff1aSopenharmony_ci        TR_16(e_32, src, 1, 2 * sstep, SET, end / 2);             \
225cabdff1aSopenharmony_ci                                                                  \
226cabdff1aSopenharmony_ci        for (i = 0; i < 16; i++) {                                \
227cabdff1aSopenharmony_ci            assign(dst[i * dstep], e_32[i] + o_32[i]);            \
228cabdff1aSopenharmony_ci            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);     \
229cabdff1aSopenharmony_ci        }                                                         \
230cabdff1aSopenharmony_ci    } while (0)
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci#define IDCT_VAR4(H)                                              \
233cabdff1aSopenharmony_ci    int limit2 = FFMIN(col_limit + 4, H)
234cabdff1aSopenharmony_ci#define IDCT_VAR8(H)                                              \
235cabdff1aSopenharmony_ci    int limit  = FFMIN(col_limit, H);                             \
236cabdff1aSopenharmony_ci    int limit2 = FFMIN(col_limit + 4, H)
237cabdff1aSopenharmony_ci#define IDCT_VAR16(H)   IDCT_VAR8(H)
238cabdff1aSopenharmony_ci#define IDCT_VAR32(H)   IDCT_VAR8(H)
239cabdff1aSopenharmony_ci
240cabdff1aSopenharmony_ci#define IDCT(H)                                                   \
241cabdff1aSopenharmony_cistatic void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs,          \
242cabdff1aSopenharmony_ci                                        int col_limit)            \
243cabdff1aSopenharmony_ci{                                                                 \
244cabdff1aSopenharmony_ci    int i;                                                        \
245cabdff1aSopenharmony_ci    int      shift = 7;                                           \
246cabdff1aSopenharmony_ci    int      add   = 1 << (shift - 1);                            \
247cabdff1aSopenharmony_ci    int16_t *src   = coeffs;                                      \
248cabdff1aSopenharmony_ci    IDCT_VAR ## H(H);                                             \
249cabdff1aSopenharmony_ci                                                                  \
250cabdff1aSopenharmony_ci    for (i = 0; i < H; i++) {                                     \
251cabdff1aSopenharmony_ci        TR_ ## H(src, src, H, H, SCALE, limit2);                  \
252cabdff1aSopenharmony_ci        if (limit2 < H && i%4 == 0 && !!i)                        \
253cabdff1aSopenharmony_ci            limit2 -= 4;                                          \
254cabdff1aSopenharmony_ci        src++;                                                    \
255cabdff1aSopenharmony_ci    }                                                             \
256cabdff1aSopenharmony_ci                                                                  \
257cabdff1aSopenharmony_ci    shift = 20 - BIT_DEPTH;                                       \
258cabdff1aSopenharmony_ci    add   = 1 << (shift - 1);                                     \
259cabdff1aSopenharmony_ci    for (i = 0; i < H; i++) {                                     \
260cabdff1aSopenharmony_ci        TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit);             \
261cabdff1aSopenharmony_ci        coeffs += H;                                              \
262cabdff1aSopenharmony_ci    }                                                             \
263cabdff1aSopenharmony_ci}
264cabdff1aSopenharmony_ci
265cabdff1aSopenharmony_ci#define IDCT_DC(H)                                                \
266cabdff1aSopenharmony_cistatic void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs)    \
267cabdff1aSopenharmony_ci{                                                                 \
268cabdff1aSopenharmony_ci    int i, j;                                                     \
269cabdff1aSopenharmony_ci    int shift = 14 - BIT_DEPTH;                                   \
270cabdff1aSopenharmony_ci    int add   = 1 << (shift - 1);                                 \
271cabdff1aSopenharmony_ci    int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift;          \
272cabdff1aSopenharmony_ci                                                                  \
273cabdff1aSopenharmony_ci    for (j = 0; j < H; j++) {                                     \
274cabdff1aSopenharmony_ci        for (i = 0; i < H; i++) {                                 \
275cabdff1aSopenharmony_ci            coeffs[i + j * H] = coeff;                            \
276cabdff1aSopenharmony_ci        }                                                         \
277cabdff1aSopenharmony_ci    }                                                             \
278cabdff1aSopenharmony_ci}
279cabdff1aSopenharmony_ci
280cabdff1aSopenharmony_ciIDCT( 4)
281cabdff1aSopenharmony_ciIDCT( 8)
282cabdff1aSopenharmony_ciIDCT(16)
283cabdff1aSopenharmony_ciIDCT(32)
284cabdff1aSopenharmony_ci
285cabdff1aSopenharmony_ciIDCT_DC( 4)
286cabdff1aSopenharmony_ciIDCT_DC( 8)
287cabdff1aSopenharmony_ciIDCT_DC(16)
288cabdff1aSopenharmony_ciIDCT_DC(32)
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_ci#undef TR_4
291cabdff1aSopenharmony_ci#undef TR_8
292cabdff1aSopenharmony_ci#undef TR_16
293cabdff1aSopenharmony_ci#undef TR_32
294cabdff1aSopenharmony_ci
295cabdff1aSopenharmony_ci#undef SET
296cabdff1aSopenharmony_ci#undef SCALE
297cabdff1aSopenharmony_ci
298cabdff1aSopenharmony_cistatic void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
299cabdff1aSopenharmony_ci                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
300cabdff1aSopenharmony_ci                                  int16_t *sao_offset_val, int sao_left_class,
301cabdff1aSopenharmony_ci                                  int width, int height)
302cabdff1aSopenharmony_ci{
303cabdff1aSopenharmony_ci    pixel *dst = (pixel *)_dst;
304cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
305cabdff1aSopenharmony_ci    int offset_table[32] = { 0 };
306cabdff1aSopenharmony_ci    int k, y, x;
307cabdff1aSopenharmony_ci    int shift  = BIT_DEPTH - 5;
308cabdff1aSopenharmony_ci
309cabdff1aSopenharmony_ci    stride_dst /= sizeof(pixel);
310cabdff1aSopenharmony_ci    stride_src /= sizeof(pixel);
311cabdff1aSopenharmony_ci
312cabdff1aSopenharmony_ci    for (k = 0; k < 4; k++)
313cabdff1aSopenharmony_ci        offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
314cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
315cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
316cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(src[x] + offset_table[(src[x] >> shift) & 31]);
317cabdff1aSopenharmony_ci        dst += stride_dst;
318cabdff1aSopenharmony_ci        src += stride_src;
319cabdff1aSopenharmony_ci    }
320cabdff1aSopenharmony_ci}
321cabdff1aSopenharmony_ci
322cabdff1aSopenharmony_ci#define CMP(a, b) (((a) > (b)) - ((a) < (b)))
323cabdff1aSopenharmony_ci
324cabdff1aSopenharmony_cistatic void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
325cabdff1aSopenharmony_ci                                  int eo, int width, int height) {
326cabdff1aSopenharmony_ci
327cabdff1aSopenharmony_ci    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
328cabdff1aSopenharmony_ci    static const int8_t pos[4][2][2] = {
329cabdff1aSopenharmony_ci        { { -1,  0 }, {  1, 0 } }, // horizontal
330cabdff1aSopenharmony_ci        { {  0, -1 }, {  0, 1 } }, // vertical
331cabdff1aSopenharmony_ci        { { -1, -1 }, {  1, 1 } }, // 45 degree
332cabdff1aSopenharmony_ci        { {  1, -1 }, { -1, 1 } }, // 135 degree
333cabdff1aSopenharmony_ci    };
334cabdff1aSopenharmony_ci    pixel *dst = (pixel *)_dst;
335cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
336cabdff1aSopenharmony_ci    int a_stride, b_stride;
337cabdff1aSopenharmony_ci    int x, y;
338cabdff1aSopenharmony_ci    ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
339cabdff1aSopenharmony_ci    stride_dst /= sizeof(pixel);
340cabdff1aSopenharmony_ci
341cabdff1aSopenharmony_ci    a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
342cabdff1aSopenharmony_ci    b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
343cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
344cabdff1aSopenharmony_ci        for (x = 0; x < width; x++) {
345cabdff1aSopenharmony_ci            int diff0 = CMP(src[x], src[x + a_stride]);
346cabdff1aSopenharmony_ci            int diff1 = CMP(src[x], src[x + b_stride]);
347cabdff1aSopenharmony_ci            int offset_val        = edge_idx[2 + diff0 + diff1];
348cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
349cabdff1aSopenharmony_ci        }
350cabdff1aSopenharmony_ci        src += stride_src;
351cabdff1aSopenharmony_ci        dst += stride_dst;
352cabdff1aSopenharmony_ci    }
353cabdff1aSopenharmony_ci}
354cabdff1aSopenharmony_ci
355cabdff1aSopenharmony_cistatic void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
356cabdff1aSopenharmony_ci                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
357cabdff1aSopenharmony_ci                                    int *borders, int _width, int _height,
358cabdff1aSopenharmony_ci                                    int c_idx, uint8_t *vert_edge,
359cabdff1aSopenharmony_ci                                    uint8_t *horiz_edge, uint8_t *diag_edge)
360cabdff1aSopenharmony_ci{
361cabdff1aSopenharmony_ci    int x, y;
362cabdff1aSopenharmony_ci    pixel *dst = (pixel *)_dst;
363cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
364cabdff1aSopenharmony_ci    int16_t *sao_offset_val = sao->offset_val[c_idx];
365cabdff1aSopenharmony_ci    int sao_eo_class    = sao->eo_class[c_idx];
366cabdff1aSopenharmony_ci    int init_x = 0, width = _width, height = _height;
367cabdff1aSopenharmony_ci
368cabdff1aSopenharmony_ci    stride_dst /= sizeof(pixel);
369cabdff1aSopenharmony_ci    stride_src /= sizeof(pixel);
370cabdff1aSopenharmony_ci
371cabdff1aSopenharmony_ci    if (sao_eo_class != SAO_EO_VERT) {
372cabdff1aSopenharmony_ci        if (borders[0]) {
373cabdff1aSopenharmony_ci            int offset_val = sao_offset_val[0];
374cabdff1aSopenharmony_ci            for (y = 0; y < height; y++) {
375cabdff1aSopenharmony_ci                dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
376cabdff1aSopenharmony_ci            }
377cabdff1aSopenharmony_ci            init_x = 1;
378cabdff1aSopenharmony_ci        }
379cabdff1aSopenharmony_ci        if (borders[2]) {
380cabdff1aSopenharmony_ci            int offset_val = sao_offset_val[0];
381cabdff1aSopenharmony_ci            int offset     = width - 1;
382cabdff1aSopenharmony_ci            for (x = 0; x < height; x++) {
383cabdff1aSopenharmony_ci                dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
384cabdff1aSopenharmony_ci            }
385cabdff1aSopenharmony_ci            width--;
386cabdff1aSopenharmony_ci        }
387cabdff1aSopenharmony_ci    }
388cabdff1aSopenharmony_ci    if (sao_eo_class != SAO_EO_HORIZ) {
389cabdff1aSopenharmony_ci        if (borders[1]) {
390cabdff1aSopenharmony_ci            int offset_val = sao_offset_val[0];
391cabdff1aSopenharmony_ci            for (x = init_x; x < width; x++)
392cabdff1aSopenharmony_ci                dst[x] = av_clip_pixel(src[x] + offset_val);
393cabdff1aSopenharmony_ci        }
394cabdff1aSopenharmony_ci        if (borders[3]) {
395cabdff1aSopenharmony_ci            int offset_val   = sao_offset_val[0];
396cabdff1aSopenharmony_ci            ptrdiff_t y_stride_dst = stride_dst * (height - 1);
397cabdff1aSopenharmony_ci            ptrdiff_t y_stride_src = stride_src * (height - 1);
398cabdff1aSopenharmony_ci            for (x = init_x; x < width; x++)
399cabdff1aSopenharmony_ci                dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
400cabdff1aSopenharmony_ci            height--;
401cabdff1aSopenharmony_ci        }
402cabdff1aSopenharmony_ci    }
403cabdff1aSopenharmony_ci}
404cabdff1aSopenharmony_ci
405cabdff1aSopenharmony_cistatic void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
406cabdff1aSopenharmony_ci                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
407cabdff1aSopenharmony_ci                                    int *borders, int _width, int _height,
408cabdff1aSopenharmony_ci                                    int c_idx, uint8_t *vert_edge,
409cabdff1aSopenharmony_ci                                    uint8_t *horiz_edge, uint8_t *diag_edge)
410cabdff1aSopenharmony_ci{
411cabdff1aSopenharmony_ci    int x, y;
412cabdff1aSopenharmony_ci    pixel *dst = (pixel *)_dst;
413cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
414cabdff1aSopenharmony_ci    int16_t *sao_offset_val = sao->offset_val[c_idx];
415cabdff1aSopenharmony_ci    int sao_eo_class    = sao->eo_class[c_idx];
416cabdff1aSopenharmony_ci    int init_x = 0, init_y = 0, width = _width, height = _height;
417cabdff1aSopenharmony_ci
418cabdff1aSopenharmony_ci    stride_dst /= sizeof(pixel);
419cabdff1aSopenharmony_ci    stride_src /= sizeof(pixel);
420cabdff1aSopenharmony_ci
421cabdff1aSopenharmony_ci    if (sao_eo_class != SAO_EO_VERT) {
422cabdff1aSopenharmony_ci        if (borders[0]) {
423cabdff1aSopenharmony_ci            int offset_val = sao_offset_val[0];
424cabdff1aSopenharmony_ci            for (y = 0; y < height; y++) {
425cabdff1aSopenharmony_ci                dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
426cabdff1aSopenharmony_ci            }
427cabdff1aSopenharmony_ci            init_x = 1;
428cabdff1aSopenharmony_ci        }
429cabdff1aSopenharmony_ci        if (borders[2]) {
430cabdff1aSopenharmony_ci            int offset_val = sao_offset_val[0];
431cabdff1aSopenharmony_ci            int offset     = width - 1;
432cabdff1aSopenharmony_ci            for (x = 0; x < height; x++) {
433cabdff1aSopenharmony_ci                dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
434cabdff1aSopenharmony_ci            }
435cabdff1aSopenharmony_ci            width--;
436cabdff1aSopenharmony_ci        }
437cabdff1aSopenharmony_ci    }
438cabdff1aSopenharmony_ci    if (sao_eo_class != SAO_EO_HORIZ) {
439cabdff1aSopenharmony_ci        if (borders[1]) {
440cabdff1aSopenharmony_ci            int offset_val = sao_offset_val[0];
441cabdff1aSopenharmony_ci            for (x = init_x; x < width; x++)
442cabdff1aSopenharmony_ci                dst[x] = av_clip_pixel(src[x] + offset_val);
443cabdff1aSopenharmony_ci            init_y = 1;
444cabdff1aSopenharmony_ci        }
445cabdff1aSopenharmony_ci        if (borders[3]) {
446cabdff1aSopenharmony_ci            int offset_val   = sao_offset_val[0];
447cabdff1aSopenharmony_ci            ptrdiff_t y_stride_dst = stride_dst * (height - 1);
448cabdff1aSopenharmony_ci            ptrdiff_t y_stride_src = stride_src * (height - 1);
449cabdff1aSopenharmony_ci            for (x = init_x; x < width; x++)
450cabdff1aSopenharmony_ci                dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
451cabdff1aSopenharmony_ci            height--;
452cabdff1aSopenharmony_ci        }
453cabdff1aSopenharmony_ci    }
454cabdff1aSopenharmony_ci
455cabdff1aSopenharmony_ci    {
456cabdff1aSopenharmony_ci        int save_upper_left  = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
457cabdff1aSopenharmony_ci        int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D  && !borders[1] && !borders[2];
458cabdff1aSopenharmony_ci        int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
459cabdff1aSopenharmony_ci        int save_lower_left  = !diag_edge[3] && sao_eo_class == SAO_EO_45D  && !borders[0] && !borders[3];
460cabdff1aSopenharmony_ci
461cabdff1aSopenharmony_ci        // Restore pixels that can't be modified
462cabdff1aSopenharmony_ci        if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
463cabdff1aSopenharmony_ci            for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
464cabdff1aSopenharmony_ci                dst[y*stride_dst] = src[y*stride_src];
465cabdff1aSopenharmony_ci        }
466cabdff1aSopenharmony_ci        if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
467cabdff1aSopenharmony_ci            for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
468cabdff1aSopenharmony_ci                dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
469cabdff1aSopenharmony_ci        }
470cabdff1aSopenharmony_ci
471cabdff1aSopenharmony_ci        if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
472cabdff1aSopenharmony_ci            for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
473cabdff1aSopenharmony_ci                dst[x] = src[x];
474cabdff1aSopenharmony_ci        }
475cabdff1aSopenharmony_ci        if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
476cabdff1aSopenharmony_ci            for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
477cabdff1aSopenharmony_ci                dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
478cabdff1aSopenharmony_ci        }
479cabdff1aSopenharmony_ci        if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
480cabdff1aSopenharmony_ci            dst[0] = src[0];
481cabdff1aSopenharmony_ci        if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
482cabdff1aSopenharmony_ci            dst[width-1] = src[width-1];
483cabdff1aSopenharmony_ci        if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
484cabdff1aSopenharmony_ci            dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
485cabdff1aSopenharmony_ci        if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
486cabdff1aSopenharmony_ci            dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
487cabdff1aSopenharmony_ci
488cabdff1aSopenharmony_ci    }
489cabdff1aSopenharmony_ci}
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_ci#undef CMP
492cabdff1aSopenharmony_ci
493cabdff1aSopenharmony_ci////////////////////////////////////////////////////////////////////////////////
494cabdff1aSopenharmony_ci//
495cabdff1aSopenharmony_ci////////////////////////////////////////////////////////////////////////////////
496cabdff1aSopenharmony_cistatic void FUNC(put_hevc_pel_pixels)(int16_t *dst,
497cabdff1aSopenharmony_ci                                      uint8_t *_src, ptrdiff_t _srcstride,
498cabdff1aSopenharmony_ci                                      int height, intptr_t mx, intptr_t my, int width)
499cabdff1aSopenharmony_ci{
500cabdff1aSopenharmony_ci    int x, y;
501cabdff1aSopenharmony_ci    pixel *src          = (pixel *)_src;
502cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
503cabdff1aSopenharmony_ci
504cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
505cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
506cabdff1aSopenharmony_ci            dst[x] = src[x] << (14 - BIT_DEPTH);
507cabdff1aSopenharmony_ci        src += srcstride;
508cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
509cabdff1aSopenharmony_ci    }
510cabdff1aSopenharmony_ci}
511cabdff1aSopenharmony_ci
512cabdff1aSopenharmony_cistatic void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
513cabdff1aSopenharmony_ci                                          int height, intptr_t mx, intptr_t my, int width)
514cabdff1aSopenharmony_ci{
515cabdff1aSopenharmony_ci    int y;
516cabdff1aSopenharmony_ci    pixel *src          = (pixel *)_src;
517cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
518cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
519cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
520cabdff1aSopenharmony_ci
521cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
522cabdff1aSopenharmony_ci        memcpy(dst, src, width * sizeof(pixel));
523cabdff1aSopenharmony_ci        src += srcstride;
524cabdff1aSopenharmony_ci        dst += dststride;
525cabdff1aSopenharmony_ci    }
526cabdff1aSopenharmony_ci}
527cabdff1aSopenharmony_ci
528cabdff1aSopenharmony_cistatic void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
529cabdff1aSopenharmony_ci                                         int16_t *src2,
530cabdff1aSopenharmony_ci                                         int height, intptr_t mx, intptr_t my, int width)
531cabdff1aSopenharmony_ci{
532cabdff1aSopenharmony_ci    int x, y;
533cabdff1aSopenharmony_ci    pixel *src          = (pixel *)_src;
534cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
535cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
536cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
537cabdff1aSopenharmony_ci
538cabdff1aSopenharmony_ci    int shift = 14  + 1 - BIT_DEPTH;
539cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
540cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
541cabdff1aSopenharmony_ci#else
542cabdff1aSopenharmony_ci    int offset = 0;
543cabdff1aSopenharmony_ci#endif
544cabdff1aSopenharmony_ci
545cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
546cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
547cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
548cabdff1aSopenharmony_ci        src  += srcstride;
549cabdff1aSopenharmony_ci        dst  += dststride;
550cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
551cabdff1aSopenharmony_ci    }
552cabdff1aSopenharmony_ci}
553cabdff1aSopenharmony_ci
554cabdff1aSopenharmony_cistatic void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
555cabdff1aSopenharmony_ci                                            int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
556cabdff1aSopenharmony_ci{
557cabdff1aSopenharmony_ci    int x, y;
558cabdff1aSopenharmony_ci    pixel *src          = (pixel *)_src;
559cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
560cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
561cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
562cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
563cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
564cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
565cabdff1aSopenharmony_ci#else
566cabdff1aSopenharmony_ci    int offset = 0;
567cabdff1aSopenharmony_ci#endif
568cabdff1aSopenharmony_ci
569cabdff1aSopenharmony_ci    ox     = ox * (1 << (BIT_DEPTH - 8));
570cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
571cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
572cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
573cabdff1aSopenharmony_ci        src += srcstride;
574cabdff1aSopenharmony_ci        dst += dststride;
575cabdff1aSopenharmony_ci    }
576cabdff1aSopenharmony_ci}
577cabdff1aSopenharmony_ci
578cabdff1aSopenharmony_cistatic void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
579cabdff1aSopenharmony_ci                                           int16_t *src2,
580cabdff1aSopenharmony_ci                                           int height, int denom, int wx0, int wx1,
581cabdff1aSopenharmony_ci                                           int ox0, int ox1, intptr_t mx, intptr_t my, int width)
582cabdff1aSopenharmony_ci{
583cabdff1aSopenharmony_ci    int x, y;
584cabdff1aSopenharmony_ci    pixel *src          = (pixel *)_src;
585cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
586cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
587cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
588cabdff1aSopenharmony_ci
589cabdff1aSopenharmony_ci    int shift = 14  + 1 - BIT_DEPTH;
590cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
591cabdff1aSopenharmony_ci
592cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
593cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
594cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
595cabdff1aSopenharmony_ci        for (x = 0; x < width; x++) {
596cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
597cabdff1aSopenharmony_ci        }
598cabdff1aSopenharmony_ci        src  += srcstride;
599cabdff1aSopenharmony_ci        dst  += dststride;
600cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
601cabdff1aSopenharmony_ci    }
602cabdff1aSopenharmony_ci}
603cabdff1aSopenharmony_ci
604cabdff1aSopenharmony_ci////////////////////////////////////////////////////////////////////////////////
605cabdff1aSopenharmony_ci//
606cabdff1aSopenharmony_ci////////////////////////////////////////////////////////////////////////////////
607cabdff1aSopenharmony_ci#define QPEL_FILTER(src, stride)                                               \
608cabdff1aSopenharmony_ci    (filter[0] * src[x - 3 * stride] +                                         \
609cabdff1aSopenharmony_ci     filter[1] * src[x - 2 * stride] +                                         \
610cabdff1aSopenharmony_ci     filter[2] * src[x -     stride] +                                         \
611cabdff1aSopenharmony_ci     filter[3] * src[x             ] +                                         \
612cabdff1aSopenharmony_ci     filter[4] * src[x +     stride] +                                         \
613cabdff1aSopenharmony_ci     filter[5] * src[x + 2 * stride] +                                         \
614cabdff1aSopenharmony_ci     filter[6] * src[x + 3 * stride] +                                         \
615cabdff1aSopenharmony_ci     filter[7] * src[x + 4 * stride])
616cabdff1aSopenharmony_ci
617cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_h)(int16_t *dst,
618cabdff1aSopenharmony_ci                                  uint8_t *_src, ptrdiff_t _srcstride,
619cabdff1aSopenharmony_ci                                  int height, intptr_t mx, intptr_t my, int width)
620cabdff1aSopenharmony_ci{
621cabdff1aSopenharmony_ci    int x, y;
622cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
623cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
624cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
625cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
626cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
627cabdff1aSopenharmony_ci            dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
628cabdff1aSopenharmony_ci        src += srcstride;
629cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
630cabdff1aSopenharmony_ci    }
631cabdff1aSopenharmony_ci}
632cabdff1aSopenharmony_ci
633cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_v)(int16_t *dst,
634cabdff1aSopenharmony_ci                                  uint8_t *_src, ptrdiff_t _srcstride,
635cabdff1aSopenharmony_ci                                  int height, intptr_t mx, intptr_t my, int width)
636cabdff1aSopenharmony_ci{
637cabdff1aSopenharmony_ci    int x, y;
638cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
639cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
640cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
641cabdff1aSopenharmony_ci    for (y = 0; y < height; y++)  {
642cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
643cabdff1aSopenharmony_ci            dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
644cabdff1aSopenharmony_ci        src += srcstride;
645cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
646cabdff1aSopenharmony_ci    }
647cabdff1aSopenharmony_ci}
648cabdff1aSopenharmony_ci
649cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_hv)(int16_t *dst,
650cabdff1aSopenharmony_ci                                   uint8_t *_src,
651cabdff1aSopenharmony_ci                                   ptrdiff_t _srcstride,
652cabdff1aSopenharmony_ci                                   int height, intptr_t mx,
653cabdff1aSopenharmony_ci                                   intptr_t my, int width)
654cabdff1aSopenharmony_ci{
655cabdff1aSopenharmony_ci    int x, y;
656cabdff1aSopenharmony_ci    const int8_t *filter;
657cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;
658cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
659cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
660cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
661cabdff1aSopenharmony_ci
662cabdff1aSopenharmony_ci    src   -= QPEL_EXTRA_BEFORE * srcstride;
663cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];
664cabdff1aSopenharmony_ci    for (y = 0; y < height + QPEL_EXTRA; y++) {
665cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
666cabdff1aSopenharmony_ci            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
667cabdff1aSopenharmony_ci        src += srcstride;
668cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
669cabdff1aSopenharmony_ci    }
670cabdff1aSopenharmony_ci
671cabdff1aSopenharmony_ci    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
672cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];
673cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
674cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
675cabdff1aSopenharmony_ci            dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
676cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
677cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
678cabdff1aSopenharmony_ci    }
679cabdff1aSopenharmony_ci}
680cabdff1aSopenharmony_ci
681cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst,  ptrdiff_t _dststride,
682cabdff1aSopenharmony_ci                                      uint8_t *_src, ptrdiff_t _srcstride,
683cabdff1aSopenharmony_ci                                      int height, intptr_t mx, intptr_t my, int width)
684cabdff1aSopenharmony_ci{
685cabdff1aSopenharmony_ci    int x, y;
686cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
687cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
688cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
689cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
690cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
691cabdff1aSopenharmony_ci    int shift = 14 - BIT_DEPTH;
692cabdff1aSopenharmony_ci
693cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
694cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
695cabdff1aSopenharmony_ci#else
696cabdff1aSopenharmony_ci    int offset = 0;
697cabdff1aSopenharmony_ci#endif
698cabdff1aSopenharmony_ci
699cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
700cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
701cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
702cabdff1aSopenharmony_ci        src += srcstride;
703cabdff1aSopenharmony_ci        dst += dststride;
704cabdff1aSopenharmony_ci    }
705cabdff1aSopenharmony_ci}
706cabdff1aSopenharmony_ci
707cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
708cabdff1aSopenharmony_ci                                     int16_t *src2,
709cabdff1aSopenharmony_ci                                     int height, intptr_t mx, intptr_t my, int width)
710cabdff1aSopenharmony_ci{
711cabdff1aSopenharmony_ci    int x, y;
712cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
713cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
714cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
715cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
716cabdff1aSopenharmony_ci
717cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
718cabdff1aSopenharmony_ci
719cabdff1aSopenharmony_ci    int shift = 14  + 1 - BIT_DEPTH;
720cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
721cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
722cabdff1aSopenharmony_ci#else
723cabdff1aSopenharmony_ci    int offset = 0;
724cabdff1aSopenharmony_ci#endif
725cabdff1aSopenharmony_ci
726cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
727cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
728cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
729cabdff1aSopenharmony_ci        src  += srcstride;
730cabdff1aSopenharmony_ci        dst  += dststride;
731cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
732cabdff1aSopenharmony_ci    }
733cabdff1aSopenharmony_ci}
734cabdff1aSopenharmony_ci
735cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst,  ptrdiff_t _dststride,
736cabdff1aSopenharmony_ci                                     uint8_t *_src, ptrdiff_t _srcstride,
737cabdff1aSopenharmony_ci                                     int height, intptr_t mx, intptr_t my, int width)
738cabdff1aSopenharmony_ci{
739cabdff1aSopenharmony_ci    int x, y;
740cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
741cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
742cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
743cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
744cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
745cabdff1aSopenharmony_ci    int shift = 14 - BIT_DEPTH;
746cabdff1aSopenharmony_ci
747cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
748cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
749cabdff1aSopenharmony_ci#else
750cabdff1aSopenharmony_ci    int offset = 0;
751cabdff1aSopenharmony_ci#endif
752cabdff1aSopenharmony_ci
753cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
754cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
755cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
756cabdff1aSopenharmony_ci        src += srcstride;
757cabdff1aSopenharmony_ci        dst += dststride;
758cabdff1aSopenharmony_ci    }
759cabdff1aSopenharmony_ci}
760cabdff1aSopenharmony_ci
761cabdff1aSopenharmony_ci
762cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
763cabdff1aSopenharmony_ci                                     int16_t *src2,
764cabdff1aSopenharmony_ci                                     int height, intptr_t mx, intptr_t my, int width)
765cabdff1aSopenharmony_ci{
766cabdff1aSopenharmony_ci    int x, y;
767cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
768cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
769cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
770cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
771cabdff1aSopenharmony_ci
772cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
773cabdff1aSopenharmony_ci
774cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
775cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
776cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
777cabdff1aSopenharmony_ci#else
778cabdff1aSopenharmony_ci    int offset = 0;
779cabdff1aSopenharmony_ci#endif
780cabdff1aSopenharmony_ci
781cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
782cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
783cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
784cabdff1aSopenharmony_ci        src  += srcstride;
785cabdff1aSopenharmony_ci        dst  += dststride;
786cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
787cabdff1aSopenharmony_ci    }
788cabdff1aSopenharmony_ci}
789cabdff1aSopenharmony_ci
790cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst,  ptrdiff_t _dststride,
791cabdff1aSopenharmony_ci                                       uint8_t *_src, ptrdiff_t _srcstride,
792cabdff1aSopenharmony_ci                                       int height, intptr_t mx, intptr_t my, int width)
793cabdff1aSopenharmony_ci{
794cabdff1aSopenharmony_ci    int x, y;
795cabdff1aSopenharmony_ci    const int8_t *filter;
796cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;
797cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
798cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
799cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
800cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
801cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
802cabdff1aSopenharmony_ci    int shift =  14 - BIT_DEPTH;
803cabdff1aSopenharmony_ci
804cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
805cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
806cabdff1aSopenharmony_ci#else
807cabdff1aSopenharmony_ci    int offset = 0;
808cabdff1aSopenharmony_ci#endif
809cabdff1aSopenharmony_ci
810cabdff1aSopenharmony_ci    src   -= QPEL_EXTRA_BEFORE * srcstride;
811cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];
812cabdff1aSopenharmony_ci    for (y = 0; y < height + QPEL_EXTRA; y++) {
813cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
814cabdff1aSopenharmony_ci            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
815cabdff1aSopenharmony_ci        src += srcstride;
816cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
817cabdff1aSopenharmony_ci    }
818cabdff1aSopenharmony_ci
819cabdff1aSopenharmony_ci    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
820cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];
821cabdff1aSopenharmony_ci
822cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
823cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
824cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
825cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
826cabdff1aSopenharmony_ci        dst += dststride;
827cabdff1aSopenharmony_ci    }
828cabdff1aSopenharmony_ci}
829cabdff1aSopenharmony_ci
830cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
831cabdff1aSopenharmony_ci                                      int16_t *src2,
832cabdff1aSopenharmony_ci                                      int height, intptr_t mx, intptr_t my, int width)
833cabdff1aSopenharmony_ci{
834cabdff1aSopenharmony_ci    int x, y;
835cabdff1aSopenharmony_ci    const int8_t *filter;
836cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;
837cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
838cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
839cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
840cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
841cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
842cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
843cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
844cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
845cabdff1aSopenharmony_ci#else
846cabdff1aSopenharmony_ci    int offset = 0;
847cabdff1aSopenharmony_ci#endif
848cabdff1aSopenharmony_ci
849cabdff1aSopenharmony_ci    src   -= QPEL_EXTRA_BEFORE * srcstride;
850cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];
851cabdff1aSopenharmony_ci    for (y = 0; y < height + QPEL_EXTRA; y++) {
852cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
853cabdff1aSopenharmony_ci            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
854cabdff1aSopenharmony_ci        src += srcstride;
855cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
856cabdff1aSopenharmony_ci    }
857cabdff1aSopenharmony_ci
858cabdff1aSopenharmony_ci    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
859cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];
860cabdff1aSopenharmony_ci
861cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
862cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
863cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
864cabdff1aSopenharmony_ci        tmp  += MAX_PB_SIZE;
865cabdff1aSopenharmony_ci        dst  += dststride;
866cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
867cabdff1aSopenharmony_ci    }
868cabdff1aSopenharmony_ci}
869cabdff1aSopenharmony_ci
870cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst,  ptrdiff_t _dststride,
871cabdff1aSopenharmony_ci                                        uint8_t *_src, ptrdiff_t _srcstride,
872cabdff1aSopenharmony_ci                                        int height, int denom, int wx, int ox,
873cabdff1aSopenharmony_ci                                        intptr_t mx, intptr_t my, int width)
874cabdff1aSopenharmony_ci{
875cabdff1aSopenharmony_ci    int x, y;
876cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
877cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
878cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
879cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
880cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
881cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
882cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
883cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
884cabdff1aSopenharmony_ci#else
885cabdff1aSopenharmony_ci    int offset = 0;
886cabdff1aSopenharmony_ci#endif
887cabdff1aSopenharmony_ci
888cabdff1aSopenharmony_ci    ox = ox * (1 << (BIT_DEPTH - 8));
889cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
890cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
891cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
892cabdff1aSopenharmony_ci        src += srcstride;
893cabdff1aSopenharmony_ci        dst += dststride;
894cabdff1aSopenharmony_ci    }
895cabdff1aSopenharmony_ci}
896cabdff1aSopenharmony_ci
897cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
898cabdff1aSopenharmony_ci                                       int16_t *src2,
899cabdff1aSopenharmony_ci                                       int height, int denom, int wx0, int wx1,
900cabdff1aSopenharmony_ci                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
901cabdff1aSopenharmony_ci{
902cabdff1aSopenharmony_ci    int x, y;
903cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
904cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
905cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
906cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
907cabdff1aSopenharmony_ci
908cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
909cabdff1aSopenharmony_ci
910cabdff1aSopenharmony_ci    int shift = 14  + 1 - BIT_DEPTH;
911cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
912cabdff1aSopenharmony_ci
913cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
914cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
915cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
916cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
917cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
918cabdff1aSopenharmony_ci                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
919cabdff1aSopenharmony_ci        src  += srcstride;
920cabdff1aSopenharmony_ci        dst  += dststride;
921cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
922cabdff1aSopenharmony_ci    }
923cabdff1aSopenharmony_ci}
924cabdff1aSopenharmony_ci
925cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst,  ptrdiff_t _dststride,
926cabdff1aSopenharmony_ci                                        uint8_t *_src, ptrdiff_t _srcstride,
927cabdff1aSopenharmony_ci                                        int height, int denom, int wx, int ox,
928cabdff1aSopenharmony_ci                                        intptr_t mx, intptr_t my, int width)
929cabdff1aSopenharmony_ci{
930cabdff1aSopenharmony_ci    int x, y;
931cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
932cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
933cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
934cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
935cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
936cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
937cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
938cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
939cabdff1aSopenharmony_ci#else
940cabdff1aSopenharmony_ci    int offset = 0;
941cabdff1aSopenharmony_ci#endif
942cabdff1aSopenharmony_ci
943cabdff1aSopenharmony_ci    ox = ox * (1 << (BIT_DEPTH - 8));
944cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
945cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
946cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
947cabdff1aSopenharmony_ci        src += srcstride;
948cabdff1aSopenharmony_ci        dst += dststride;
949cabdff1aSopenharmony_ci    }
950cabdff1aSopenharmony_ci}
951cabdff1aSopenharmony_ci
952cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
953cabdff1aSopenharmony_ci                                       int16_t *src2,
954cabdff1aSopenharmony_ci                                       int height, int denom, int wx0, int wx1,
955cabdff1aSopenharmony_ci                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
956cabdff1aSopenharmony_ci{
957cabdff1aSopenharmony_ci    int x, y;
958cabdff1aSopenharmony_ci    pixel        *src       = (pixel*)_src;
959cabdff1aSopenharmony_ci    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
960cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
961cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
962cabdff1aSopenharmony_ci
963cabdff1aSopenharmony_ci    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
964cabdff1aSopenharmony_ci
965cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
966cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
967cabdff1aSopenharmony_ci
968cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
969cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
970cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
971cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
972cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
973cabdff1aSopenharmony_ci                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
974cabdff1aSopenharmony_ci        src  += srcstride;
975cabdff1aSopenharmony_ci        dst  += dststride;
976cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
977cabdff1aSopenharmony_ci    }
978cabdff1aSopenharmony_ci}
979cabdff1aSopenharmony_ci
980cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst,  ptrdiff_t _dststride,
981cabdff1aSopenharmony_ci                                         uint8_t *_src, ptrdiff_t _srcstride,
982cabdff1aSopenharmony_ci                                         int height, int denom, int wx, int ox,
983cabdff1aSopenharmony_ci                                         intptr_t mx, intptr_t my, int width)
984cabdff1aSopenharmony_ci{
985cabdff1aSopenharmony_ci    int x, y;
986cabdff1aSopenharmony_ci    const int8_t *filter;
987cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;
988cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
989cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
990cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
991cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
992cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
993cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
994cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
995cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
996cabdff1aSopenharmony_ci#else
997cabdff1aSopenharmony_ci    int offset = 0;
998cabdff1aSopenharmony_ci#endif
999cabdff1aSopenharmony_ci
1000cabdff1aSopenharmony_ci    src   -= QPEL_EXTRA_BEFORE * srcstride;
1001cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];
1002cabdff1aSopenharmony_ci    for (y = 0; y < height + QPEL_EXTRA; y++) {
1003cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1004cabdff1aSopenharmony_ci            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1005cabdff1aSopenharmony_ci        src += srcstride;
1006cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1007cabdff1aSopenharmony_ci    }
1008cabdff1aSopenharmony_ci
1009cabdff1aSopenharmony_ci    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1010cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];
1011cabdff1aSopenharmony_ci
1012cabdff1aSopenharmony_ci    ox = ox * (1 << (BIT_DEPTH - 8));
1013cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1014cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1015cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1016cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1017cabdff1aSopenharmony_ci        dst += dststride;
1018cabdff1aSopenharmony_ci    }
1019cabdff1aSopenharmony_ci}
1020cabdff1aSopenharmony_ci
1021cabdff1aSopenharmony_cistatic void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1022cabdff1aSopenharmony_ci                                        int16_t *src2,
1023cabdff1aSopenharmony_ci                                        int height, int denom, int wx0, int wx1,
1024cabdff1aSopenharmony_ci                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1025cabdff1aSopenharmony_ci{
1026cabdff1aSopenharmony_ci    int x, y;
1027cabdff1aSopenharmony_ci    const int8_t *filter;
1028cabdff1aSopenharmony_ci    pixel *src = (pixel*)_src;
1029cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1030cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1031cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1032cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1033cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
1034cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1035cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
1036cabdff1aSopenharmony_ci
1037cabdff1aSopenharmony_ci    src   -= QPEL_EXTRA_BEFORE * srcstride;
1038cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[mx - 1];
1039cabdff1aSopenharmony_ci    for (y = 0; y < height + QPEL_EXTRA; y++) {
1040cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1041cabdff1aSopenharmony_ci            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1042cabdff1aSopenharmony_ci        src += srcstride;
1043cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1044cabdff1aSopenharmony_ci    }
1045cabdff1aSopenharmony_ci
1046cabdff1aSopenharmony_ci    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1047cabdff1aSopenharmony_ci    filter = ff_hevc_qpel_filters[my - 1];
1048cabdff1aSopenharmony_ci
1049cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
1050cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
1051cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1052cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1053cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1054cabdff1aSopenharmony_ci                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1055cabdff1aSopenharmony_ci        tmp  += MAX_PB_SIZE;
1056cabdff1aSopenharmony_ci        dst  += dststride;
1057cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1058cabdff1aSopenharmony_ci    }
1059cabdff1aSopenharmony_ci}
1060cabdff1aSopenharmony_ci
1061cabdff1aSopenharmony_ci////////////////////////////////////////////////////////////////////////////////
1062cabdff1aSopenharmony_ci//
1063cabdff1aSopenharmony_ci////////////////////////////////////////////////////////////////////////////////
1064cabdff1aSopenharmony_ci#define EPEL_FILTER(src, stride)                                               \
1065cabdff1aSopenharmony_ci    (filter[0] * src[x - stride] +                                             \
1066cabdff1aSopenharmony_ci     filter[1] * src[x]          +                                             \
1067cabdff1aSopenharmony_ci     filter[2] * src[x + stride] +                                             \
1068cabdff1aSopenharmony_ci     filter[3] * src[x + 2 * stride])
1069cabdff1aSopenharmony_ci
1070cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_h)(int16_t *dst,
1071cabdff1aSopenharmony_ci                                  uint8_t *_src, ptrdiff_t _srcstride,
1072cabdff1aSopenharmony_ci                                  int height, intptr_t mx, intptr_t my, int width)
1073cabdff1aSopenharmony_ci{
1074cabdff1aSopenharmony_ci    int x, y;
1075cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1076cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1077cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1078cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1079cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1080cabdff1aSopenharmony_ci            dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1081cabdff1aSopenharmony_ci        src += srcstride;
1082cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
1083cabdff1aSopenharmony_ci    }
1084cabdff1aSopenharmony_ci}
1085cabdff1aSopenharmony_ci
1086cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_v)(int16_t *dst,
1087cabdff1aSopenharmony_ci                                  uint8_t *_src, ptrdiff_t _srcstride,
1088cabdff1aSopenharmony_ci                                  int height, intptr_t mx, intptr_t my, int width)
1089cabdff1aSopenharmony_ci{
1090cabdff1aSopenharmony_ci    int x, y;
1091cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1092cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1093cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[my - 1];
1094cabdff1aSopenharmony_ci
1095cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1096cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1097cabdff1aSopenharmony_ci            dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1098cabdff1aSopenharmony_ci        src += srcstride;
1099cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
1100cabdff1aSopenharmony_ci    }
1101cabdff1aSopenharmony_ci}
1102cabdff1aSopenharmony_ci
1103cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_hv)(int16_t *dst,
1104cabdff1aSopenharmony_ci                                   uint8_t *_src, ptrdiff_t _srcstride,
1105cabdff1aSopenharmony_ci                                   int height, intptr_t mx, intptr_t my, int width)
1106cabdff1aSopenharmony_ci{
1107cabdff1aSopenharmony_ci    int x, y;
1108cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1109cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1110cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1111cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1112cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
1113cabdff1aSopenharmony_ci
1114cabdff1aSopenharmony_ci    src -= EPEL_EXTRA_BEFORE * srcstride;
1115cabdff1aSopenharmony_ci
1116cabdff1aSopenharmony_ci    for (y = 0; y < height + EPEL_EXTRA; y++) {
1117cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1118cabdff1aSopenharmony_ci            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1119cabdff1aSopenharmony_ci        src += srcstride;
1120cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1121cabdff1aSopenharmony_ci    }
1122cabdff1aSopenharmony_ci
1123cabdff1aSopenharmony_ci    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1124cabdff1aSopenharmony_ci    filter = ff_hevc_epel_filters[my - 1];
1125cabdff1aSopenharmony_ci
1126cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1127cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1128cabdff1aSopenharmony_ci            dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1129cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1130cabdff1aSopenharmony_ci        dst += MAX_PB_SIZE;
1131cabdff1aSopenharmony_ci    }
1132cabdff1aSopenharmony_ci}
1133cabdff1aSopenharmony_ci
1134cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1135cabdff1aSopenharmony_ci                                      int height, intptr_t mx, intptr_t my, int width)
1136cabdff1aSopenharmony_ci{
1137cabdff1aSopenharmony_ci    int x, y;
1138cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1139cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1140cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1141cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1142cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1143cabdff1aSopenharmony_ci    int shift = 14 - BIT_DEPTH;
1144cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1145cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1146cabdff1aSopenharmony_ci#else
1147cabdff1aSopenharmony_ci    int offset = 0;
1148cabdff1aSopenharmony_ci#endif
1149cabdff1aSopenharmony_ci
1150cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1151cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1152cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1153cabdff1aSopenharmony_ci        src += srcstride;
1154cabdff1aSopenharmony_ci        dst += dststride;
1155cabdff1aSopenharmony_ci    }
1156cabdff1aSopenharmony_ci}
1157cabdff1aSopenharmony_ci
1158cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1159cabdff1aSopenharmony_ci                                     int16_t *src2,
1160cabdff1aSopenharmony_ci                                     int height, intptr_t mx, intptr_t my, int width)
1161cabdff1aSopenharmony_ci{
1162cabdff1aSopenharmony_ci    int x, y;
1163cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1164cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1165cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1166cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1167cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1168cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1169cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1170cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1171cabdff1aSopenharmony_ci#else
1172cabdff1aSopenharmony_ci    int offset = 0;
1173cabdff1aSopenharmony_ci#endif
1174cabdff1aSopenharmony_ci
1175cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1176cabdff1aSopenharmony_ci        for (x = 0; x < width; x++) {
1177cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1178cabdff1aSopenharmony_ci        }
1179cabdff1aSopenharmony_ci        dst  += dststride;
1180cabdff1aSopenharmony_ci        src  += srcstride;
1181cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1182cabdff1aSopenharmony_ci    }
1183cabdff1aSopenharmony_ci}
1184cabdff1aSopenharmony_ci
1185cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1186cabdff1aSopenharmony_ci                                      int height, intptr_t mx, intptr_t my, int width)
1187cabdff1aSopenharmony_ci{
1188cabdff1aSopenharmony_ci    int x, y;
1189cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1190cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1191cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1192cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1193cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[my - 1];
1194cabdff1aSopenharmony_ci    int shift = 14 - BIT_DEPTH;
1195cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1196cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1197cabdff1aSopenharmony_ci#else
1198cabdff1aSopenharmony_ci    int offset = 0;
1199cabdff1aSopenharmony_ci#endif
1200cabdff1aSopenharmony_ci
1201cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1202cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1203cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1204cabdff1aSopenharmony_ci        src += srcstride;
1205cabdff1aSopenharmony_ci        dst += dststride;
1206cabdff1aSopenharmony_ci    }
1207cabdff1aSopenharmony_ci}
1208cabdff1aSopenharmony_ci
1209cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1210cabdff1aSopenharmony_ci                                     int16_t *src2,
1211cabdff1aSopenharmony_ci                                     int height, intptr_t mx, intptr_t my, int width)
1212cabdff1aSopenharmony_ci{
1213cabdff1aSopenharmony_ci    int x, y;
1214cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1215cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1216cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[my - 1];
1217cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1218cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1219cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1220cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1221cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1222cabdff1aSopenharmony_ci#else
1223cabdff1aSopenharmony_ci    int offset = 0;
1224cabdff1aSopenharmony_ci#endif
1225cabdff1aSopenharmony_ci
1226cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1227cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1228cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1229cabdff1aSopenharmony_ci        dst  += dststride;
1230cabdff1aSopenharmony_ci        src  += srcstride;
1231cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1232cabdff1aSopenharmony_ci    }
1233cabdff1aSopenharmony_ci}
1234cabdff1aSopenharmony_ci
1235cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1236cabdff1aSopenharmony_ci                                       int height, intptr_t mx, intptr_t my, int width)
1237cabdff1aSopenharmony_ci{
1238cabdff1aSopenharmony_ci    int x, y;
1239cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1240cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1241cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1242cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1243cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1244cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1245cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
1246cabdff1aSopenharmony_ci    int shift = 14 - BIT_DEPTH;
1247cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1248cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1249cabdff1aSopenharmony_ci#else
1250cabdff1aSopenharmony_ci    int offset = 0;
1251cabdff1aSopenharmony_ci#endif
1252cabdff1aSopenharmony_ci
1253cabdff1aSopenharmony_ci    src -= EPEL_EXTRA_BEFORE * srcstride;
1254cabdff1aSopenharmony_ci
1255cabdff1aSopenharmony_ci    for (y = 0; y < height + EPEL_EXTRA; y++) {
1256cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1257cabdff1aSopenharmony_ci            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1258cabdff1aSopenharmony_ci        src += srcstride;
1259cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1260cabdff1aSopenharmony_ci    }
1261cabdff1aSopenharmony_ci
1262cabdff1aSopenharmony_ci    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1263cabdff1aSopenharmony_ci    filter = ff_hevc_epel_filters[my - 1];
1264cabdff1aSopenharmony_ci
1265cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1266cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1267cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1268cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1269cabdff1aSopenharmony_ci        dst += dststride;
1270cabdff1aSopenharmony_ci    }
1271cabdff1aSopenharmony_ci}
1272cabdff1aSopenharmony_ci
1273cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1274cabdff1aSopenharmony_ci                                      int16_t *src2,
1275cabdff1aSopenharmony_ci                                      int height, intptr_t mx, intptr_t my, int width)
1276cabdff1aSopenharmony_ci{
1277cabdff1aSopenharmony_ci    int x, y;
1278cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1279cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1280cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1281cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1282cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1283cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1284cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
1285cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1286cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1287cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1288cabdff1aSopenharmony_ci#else
1289cabdff1aSopenharmony_ci    int offset = 0;
1290cabdff1aSopenharmony_ci#endif
1291cabdff1aSopenharmony_ci
1292cabdff1aSopenharmony_ci    src -= EPEL_EXTRA_BEFORE * srcstride;
1293cabdff1aSopenharmony_ci
1294cabdff1aSopenharmony_ci    for (y = 0; y < height + EPEL_EXTRA; y++) {
1295cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1296cabdff1aSopenharmony_ci            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1297cabdff1aSopenharmony_ci        src += srcstride;
1298cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1299cabdff1aSopenharmony_ci    }
1300cabdff1aSopenharmony_ci
1301cabdff1aSopenharmony_ci    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1302cabdff1aSopenharmony_ci    filter = ff_hevc_epel_filters[my - 1];
1303cabdff1aSopenharmony_ci
1304cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1305cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1306cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1307cabdff1aSopenharmony_ci        tmp  += MAX_PB_SIZE;
1308cabdff1aSopenharmony_ci        dst  += dststride;
1309cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1310cabdff1aSopenharmony_ci    }
1311cabdff1aSopenharmony_ci}
1312cabdff1aSopenharmony_ci
1313cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1314cabdff1aSopenharmony_ci                                        int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1315cabdff1aSopenharmony_ci{
1316cabdff1aSopenharmony_ci    int x, y;
1317cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1318cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1319cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1320cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1321cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1322cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
1323cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1324cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1325cabdff1aSopenharmony_ci#else
1326cabdff1aSopenharmony_ci    int offset = 0;
1327cabdff1aSopenharmony_ci#endif
1328cabdff1aSopenharmony_ci
1329cabdff1aSopenharmony_ci    ox     = ox * (1 << (BIT_DEPTH - 8));
1330cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1331cabdff1aSopenharmony_ci        for (x = 0; x < width; x++) {
1332cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1333cabdff1aSopenharmony_ci        }
1334cabdff1aSopenharmony_ci        dst += dststride;
1335cabdff1aSopenharmony_ci        src += srcstride;
1336cabdff1aSopenharmony_ci    }
1337cabdff1aSopenharmony_ci}
1338cabdff1aSopenharmony_ci
1339cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1340cabdff1aSopenharmony_ci                                       int16_t *src2,
1341cabdff1aSopenharmony_ci                                       int height, int denom, int wx0, int wx1,
1342cabdff1aSopenharmony_ci                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1343cabdff1aSopenharmony_ci{
1344cabdff1aSopenharmony_ci    int x, y;
1345cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1346cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1347cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1348cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1349cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1350cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1351cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
1352cabdff1aSopenharmony_ci
1353cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
1354cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
1355cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1356cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1357cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1358cabdff1aSopenharmony_ci                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1359cabdff1aSopenharmony_ci        src  += srcstride;
1360cabdff1aSopenharmony_ci        dst  += dststride;
1361cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1362cabdff1aSopenharmony_ci    }
1363cabdff1aSopenharmony_ci}
1364cabdff1aSopenharmony_ci
1365cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1366cabdff1aSopenharmony_ci                                        int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1367cabdff1aSopenharmony_ci{
1368cabdff1aSopenharmony_ci    int x, y;
1369cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1370cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1371cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1372cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1373cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[my - 1];
1374cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
1375cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1376cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1377cabdff1aSopenharmony_ci#else
1378cabdff1aSopenharmony_ci    int offset = 0;
1379cabdff1aSopenharmony_ci#endif
1380cabdff1aSopenharmony_ci
1381cabdff1aSopenharmony_ci    ox     = ox * (1 << (BIT_DEPTH - 8));
1382cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1383cabdff1aSopenharmony_ci        for (x = 0; x < width; x++) {
1384cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1385cabdff1aSopenharmony_ci        }
1386cabdff1aSopenharmony_ci        dst += dststride;
1387cabdff1aSopenharmony_ci        src += srcstride;
1388cabdff1aSopenharmony_ci    }
1389cabdff1aSopenharmony_ci}
1390cabdff1aSopenharmony_ci
1391cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1392cabdff1aSopenharmony_ci                                       int16_t *src2,
1393cabdff1aSopenharmony_ci                                       int height, int denom, int wx0, int wx1,
1394cabdff1aSopenharmony_ci                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1395cabdff1aSopenharmony_ci{
1396cabdff1aSopenharmony_ci    int x, y;
1397cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1398cabdff1aSopenharmony_ci    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
1399cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[my - 1];
1400cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1401cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1402cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1403cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
1404cabdff1aSopenharmony_ci
1405cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
1406cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
1407cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1408cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1409cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1410cabdff1aSopenharmony_ci                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1411cabdff1aSopenharmony_ci        src  += srcstride;
1412cabdff1aSopenharmony_ci        dst  += dststride;
1413cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1414cabdff1aSopenharmony_ci    }
1415cabdff1aSopenharmony_ci}
1416cabdff1aSopenharmony_ci
1417cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1418cabdff1aSopenharmony_ci                                         int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1419cabdff1aSopenharmony_ci{
1420cabdff1aSopenharmony_ci    int x, y;
1421cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1422cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1423cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1424cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1425cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1426cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1427cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
1428cabdff1aSopenharmony_ci    int shift = denom + 14 - BIT_DEPTH;
1429cabdff1aSopenharmony_ci#if BIT_DEPTH < 14
1430cabdff1aSopenharmony_ci    int offset = 1 << (shift - 1);
1431cabdff1aSopenharmony_ci#else
1432cabdff1aSopenharmony_ci    int offset = 0;
1433cabdff1aSopenharmony_ci#endif
1434cabdff1aSopenharmony_ci
1435cabdff1aSopenharmony_ci    src -= EPEL_EXTRA_BEFORE * srcstride;
1436cabdff1aSopenharmony_ci
1437cabdff1aSopenharmony_ci    for (y = 0; y < height + EPEL_EXTRA; y++) {
1438cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1439cabdff1aSopenharmony_ci            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1440cabdff1aSopenharmony_ci        src += srcstride;
1441cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1442cabdff1aSopenharmony_ci    }
1443cabdff1aSopenharmony_ci
1444cabdff1aSopenharmony_ci    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1445cabdff1aSopenharmony_ci    filter = ff_hevc_epel_filters[my - 1];
1446cabdff1aSopenharmony_ci
1447cabdff1aSopenharmony_ci    ox     = ox * (1 << (BIT_DEPTH - 8));
1448cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1449cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1450cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1451cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1452cabdff1aSopenharmony_ci        dst += dststride;
1453cabdff1aSopenharmony_ci    }
1454cabdff1aSopenharmony_ci}
1455cabdff1aSopenharmony_ci
1456cabdff1aSopenharmony_cistatic void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1457cabdff1aSopenharmony_ci                                        int16_t *src2,
1458cabdff1aSopenharmony_ci                                        int height, int denom, int wx0, int wx1,
1459cabdff1aSopenharmony_ci                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1460cabdff1aSopenharmony_ci{
1461cabdff1aSopenharmony_ci    int x, y;
1462cabdff1aSopenharmony_ci    pixel *src = (pixel *)_src;
1463cabdff1aSopenharmony_ci    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1464cabdff1aSopenharmony_ci    pixel *dst          = (pixel *)_dst;
1465cabdff1aSopenharmony_ci    ptrdiff_t dststride = _dststride / sizeof(pixel);
1466cabdff1aSopenharmony_ci    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1467cabdff1aSopenharmony_ci    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1468cabdff1aSopenharmony_ci    int16_t *tmp = tmp_array;
1469cabdff1aSopenharmony_ci    int shift = 14 + 1 - BIT_DEPTH;
1470cabdff1aSopenharmony_ci    int log2Wd = denom + shift - 1;
1471cabdff1aSopenharmony_ci
1472cabdff1aSopenharmony_ci    src -= EPEL_EXTRA_BEFORE * srcstride;
1473cabdff1aSopenharmony_ci
1474cabdff1aSopenharmony_ci    for (y = 0; y < height + EPEL_EXTRA; y++) {
1475cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1476cabdff1aSopenharmony_ci            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1477cabdff1aSopenharmony_ci        src += srcstride;
1478cabdff1aSopenharmony_ci        tmp += MAX_PB_SIZE;
1479cabdff1aSopenharmony_ci    }
1480cabdff1aSopenharmony_ci
1481cabdff1aSopenharmony_ci    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1482cabdff1aSopenharmony_ci    filter = ff_hevc_epel_filters[my - 1];
1483cabdff1aSopenharmony_ci
1484cabdff1aSopenharmony_ci    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
1485cabdff1aSopenharmony_ci    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
1486cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
1487cabdff1aSopenharmony_ci        for (x = 0; x < width; x++)
1488cabdff1aSopenharmony_ci            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1489cabdff1aSopenharmony_ci                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1490cabdff1aSopenharmony_ci        tmp  += MAX_PB_SIZE;
1491cabdff1aSopenharmony_ci        dst  += dststride;
1492cabdff1aSopenharmony_ci        src2 += MAX_PB_SIZE;
1493cabdff1aSopenharmony_ci    }
1494cabdff1aSopenharmony_ci}
1495cabdff1aSopenharmony_ci
1496cabdff1aSopenharmony_ci// line zero
1497cabdff1aSopenharmony_ci#define P3 pix[-4 * xstride]
1498cabdff1aSopenharmony_ci#define P2 pix[-3 * xstride]
1499cabdff1aSopenharmony_ci#define P1 pix[-2 * xstride]
1500cabdff1aSopenharmony_ci#define P0 pix[-1 * xstride]
1501cabdff1aSopenharmony_ci#define Q0 pix[0 * xstride]
1502cabdff1aSopenharmony_ci#define Q1 pix[1 * xstride]
1503cabdff1aSopenharmony_ci#define Q2 pix[2 * xstride]
1504cabdff1aSopenharmony_ci#define Q3 pix[3 * xstride]
1505cabdff1aSopenharmony_ci
1506cabdff1aSopenharmony_ci// line three. used only for deblocking decision
1507cabdff1aSopenharmony_ci#define TP3 pix[-4 * xstride + 3 * ystride]
1508cabdff1aSopenharmony_ci#define TP2 pix[-3 * xstride + 3 * ystride]
1509cabdff1aSopenharmony_ci#define TP1 pix[-2 * xstride + 3 * ystride]
1510cabdff1aSopenharmony_ci#define TP0 pix[-1 * xstride + 3 * ystride]
1511cabdff1aSopenharmony_ci#define TQ0 pix[0  * xstride + 3 * ystride]
1512cabdff1aSopenharmony_ci#define TQ1 pix[1  * xstride + 3 * ystride]
1513cabdff1aSopenharmony_ci#define TQ2 pix[2  * xstride + 3 * ystride]
1514cabdff1aSopenharmony_ci#define TQ3 pix[3  * xstride + 3 * ystride]
1515cabdff1aSopenharmony_ci
1516cabdff1aSopenharmony_cistatic void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
1517cabdff1aSopenharmony_ci                                        ptrdiff_t _xstride, ptrdiff_t _ystride,
1518cabdff1aSopenharmony_ci                                        int beta, int *_tc,
1519cabdff1aSopenharmony_ci                                        uint8_t *_no_p, uint8_t *_no_q)
1520cabdff1aSopenharmony_ci{
1521cabdff1aSopenharmony_ci    int d, j;
1522cabdff1aSopenharmony_ci    pixel *pix        = (pixel *)_pix;
1523cabdff1aSopenharmony_ci    ptrdiff_t xstride = _xstride / sizeof(pixel);
1524cabdff1aSopenharmony_ci    ptrdiff_t ystride = _ystride / sizeof(pixel);
1525cabdff1aSopenharmony_ci
1526cabdff1aSopenharmony_ci    beta <<= BIT_DEPTH - 8;
1527cabdff1aSopenharmony_ci
1528cabdff1aSopenharmony_ci    for (j = 0; j < 2; j++) {
1529cabdff1aSopenharmony_ci        const int dp0  = abs(P2  - 2 * P1  + P0);
1530cabdff1aSopenharmony_ci        const int dq0  = abs(Q2  - 2 * Q1  + Q0);
1531cabdff1aSopenharmony_ci        const int dp3  = abs(TP2 - 2 * TP1 + TP0);
1532cabdff1aSopenharmony_ci        const int dq3  = abs(TQ2 - 2 * TQ1 + TQ0);
1533cabdff1aSopenharmony_ci        const int d0   = dp0 + dq0;
1534cabdff1aSopenharmony_ci        const int d3   = dp3 + dq3;
1535cabdff1aSopenharmony_ci        const int tc   = _tc[j]   << (BIT_DEPTH - 8);
1536cabdff1aSopenharmony_ci        const int no_p = _no_p[j];
1537cabdff1aSopenharmony_ci        const int no_q = _no_q[j];
1538cabdff1aSopenharmony_ci
1539cabdff1aSopenharmony_ci        if (d0 + d3 >= beta) {
1540cabdff1aSopenharmony_ci            pix += 4 * ystride;
1541cabdff1aSopenharmony_ci            continue;
1542cabdff1aSopenharmony_ci        } else {
1543cabdff1aSopenharmony_ci            const int beta_3 = beta >> 3;
1544cabdff1aSopenharmony_ci            const int beta_2 = beta >> 2;
1545cabdff1aSopenharmony_ci            const int tc25   = ((tc * 5 + 1) >> 1);
1546cabdff1aSopenharmony_ci
1547cabdff1aSopenharmony_ci            if (abs(P3  -  P0) + abs(Q3  -  Q0) < beta_3 && abs(P0  -  Q0) < tc25 &&
1548cabdff1aSopenharmony_ci                abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1549cabdff1aSopenharmony_ci                                      (d0 << 1) < beta_2 &&      (d3 << 1) < beta_2) {
1550cabdff1aSopenharmony_ci                // strong filtering
1551cabdff1aSopenharmony_ci                const int tc2 = tc << 1;
1552cabdff1aSopenharmony_ci                for (d = 0; d < 4; d++) {
1553cabdff1aSopenharmony_ci                    const int p3 = P3;
1554cabdff1aSopenharmony_ci                    const int p2 = P2;
1555cabdff1aSopenharmony_ci                    const int p1 = P1;
1556cabdff1aSopenharmony_ci                    const int p0 = P0;
1557cabdff1aSopenharmony_ci                    const int q0 = Q0;
1558cabdff1aSopenharmony_ci                    const int q1 = Q1;
1559cabdff1aSopenharmony_ci                    const int q2 = Q2;
1560cabdff1aSopenharmony_ci                    const int q3 = Q3;
1561cabdff1aSopenharmony_ci                    if (!no_p) {
1562cabdff1aSopenharmony_ci                        P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1563cabdff1aSopenharmony_ci                        P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1564cabdff1aSopenharmony_ci                        P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1565cabdff1aSopenharmony_ci                    }
1566cabdff1aSopenharmony_ci                    if (!no_q) {
1567cabdff1aSopenharmony_ci                        Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1568cabdff1aSopenharmony_ci                        Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1569cabdff1aSopenharmony_ci                        Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1570cabdff1aSopenharmony_ci                    }
1571cabdff1aSopenharmony_ci                    pix += ystride;
1572cabdff1aSopenharmony_ci                }
1573cabdff1aSopenharmony_ci            } else { // normal filtering
1574cabdff1aSopenharmony_ci                int nd_p = 1;
1575cabdff1aSopenharmony_ci                int nd_q = 1;
1576cabdff1aSopenharmony_ci                const int tc_2 = tc >> 1;
1577cabdff1aSopenharmony_ci                if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1578cabdff1aSopenharmony_ci                    nd_p = 2;
1579cabdff1aSopenharmony_ci                if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1580cabdff1aSopenharmony_ci                    nd_q = 2;
1581cabdff1aSopenharmony_ci
1582cabdff1aSopenharmony_ci                for (d = 0; d < 4; d++) {
1583cabdff1aSopenharmony_ci                    const int p2 = P2;
1584cabdff1aSopenharmony_ci                    const int p1 = P1;
1585cabdff1aSopenharmony_ci                    const int p0 = P0;
1586cabdff1aSopenharmony_ci                    const int q0 = Q0;
1587cabdff1aSopenharmony_ci                    const int q1 = Q1;
1588cabdff1aSopenharmony_ci                    const int q2 = Q2;
1589cabdff1aSopenharmony_ci                    int delta0   = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1590cabdff1aSopenharmony_ci                    if (abs(delta0) < 10 * tc) {
1591cabdff1aSopenharmony_ci                        delta0 = av_clip(delta0, -tc, tc);
1592cabdff1aSopenharmony_ci                        if (!no_p)
1593cabdff1aSopenharmony_ci                            P0 = av_clip_pixel(p0 + delta0);
1594cabdff1aSopenharmony_ci                        if (!no_q)
1595cabdff1aSopenharmony_ci                            Q0 = av_clip_pixel(q0 - delta0);
1596cabdff1aSopenharmony_ci                        if (!no_p && nd_p > 1) {
1597cabdff1aSopenharmony_ci                            const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1598cabdff1aSopenharmony_ci                            P1 = av_clip_pixel(p1 + deltap1);
1599cabdff1aSopenharmony_ci                        }
1600cabdff1aSopenharmony_ci                        if (!no_q && nd_q > 1) {
1601cabdff1aSopenharmony_ci                            const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1602cabdff1aSopenharmony_ci                            Q1 = av_clip_pixel(q1 + deltaq1);
1603cabdff1aSopenharmony_ci                        }
1604cabdff1aSopenharmony_ci                    }
1605cabdff1aSopenharmony_ci                    pix += ystride;
1606cabdff1aSopenharmony_ci                }
1607cabdff1aSopenharmony_ci            }
1608cabdff1aSopenharmony_ci        }
1609cabdff1aSopenharmony_ci    }
1610cabdff1aSopenharmony_ci}
1611cabdff1aSopenharmony_ci
1612cabdff1aSopenharmony_cistatic void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1613cabdff1aSopenharmony_ci                                          ptrdiff_t _ystride, int *_tc,
1614cabdff1aSopenharmony_ci                                          uint8_t *_no_p, uint8_t *_no_q)
1615cabdff1aSopenharmony_ci{
1616cabdff1aSopenharmony_ci    int d, j, no_p, no_q;
1617cabdff1aSopenharmony_ci    pixel *pix        = (pixel *)_pix;
1618cabdff1aSopenharmony_ci    ptrdiff_t xstride = _xstride / sizeof(pixel);
1619cabdff1aSopenharmony_ci    ptrdiff_t ystride = _ystride / sizeof(pixel);
1620cabdff1aSopenharmony_ci
1621cabdff1aSopenharmony_ci    for (j = 0; j < 2; j++) {
1622cabdff1aSopenharmony_ci        const int tc = _tc[j] << (BIT_DEPTH - 8);
1623cabdff1aSopenharmony_ci        if (tc <= 0) {
1624cabdff1aSopenharmony_ci            pix += 4 * ystride;
1625cabdff1aSopenharmony_ci            continue;
1626cabdff1aSopenharmony_ci        }
1627cabdff1aSopenharmony_ci        no_p = _no_p[j];
1628cabdff1aSopenharmony_ci        no_q = _no_q[j];
1629cabdff1aSopenharmony_ci
1630cabdff1aSopenharmony_ci        for (d = 0; d < 4; d++) {
1631cabdff1aSopenharmony_ci            int delta0;
1632cabdff1aSopenharmony_ci            const int p1 = P1;
1633cabdff1aSopenharmony_ci            const int p0 = P0;
1634cabdff1aSopenharmony_ci            const int q0 = Q0;
1635cabdff1aSopenharmony_ci            const int q1 = Q1;
1636cabdff1aSopenharmony_ci            delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1637cabdff1aSopenharmony_ci            if (!no_p)
1638cabdff1aSopenharmony_ci                P0 = av_clip_pixel(p0 + delta0);
1639cabdff1aSopenharmony_ci            if (!no_q)
1640cabdff1aSopenharmony_ci                Q0 = av_clip_pixel(q0 - delta0);
1641cabdff1aSopenharmony_ci            pix += ystride;
1642cabdff1aSopenharmony_ci        }
1643cabdff1aSopenharmony_ci    }
1644cabdff1aSopenharmony_ci}
1645cabdff1aSopenharmony_ci
1646cabdff1aSopenharmony_cistatic void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1647cabdff1aSopenharmony_ci                                            int32_t *tc, uint8_t *no_p,
1648cabdff1aSopenharmony_ci                                            uint8_t *no_q)
1649cabdff1aSopenharmony_ci{
1650cabdff1aSopenharmony_ci    FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1651cabdff1aSopenharmony_ci}
1652cabdff1aSopenharmony_ci
1653cabdff1aSopenharmony_cistatic void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1654cabdff1aSopenharmony_ci                                            int32_t *tc, uint8_t *no_p,
1655cabdff1aSopenharmony_ci                                            uint8_t *no_q)
1656cabdff1aSopenharmony_ci{
1657cabdff1aSopenharmony_ci    FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1658cabdff1aSopenharmony_ci}
1659cabdff1aSopenharmony_ci
1660cabdff1aSopenharmony_cistatic void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1661cabdff1aSopenharmony_ci                                          int beta, int32_t *tc, uint8_t *no_p,
1662cabdff1aSopenharmony_ci                                          uint8_t *no_q)
1663cabdff1aSopenharmony_ci{
1664cabdff1aSopenharmony_ci    FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1665cabdff1aSopenharmony_ci                                beta, tc, no_p, no_q);
1666cabdff1aSopenharmony_ci}
1667cabdff1aSopenharmony_ci
1668cabdff1aSopenharmony_cistatic void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1669cabdff1aSopenharmony_ci                                          int beta, int32_t *tc, uint8_t *no_p,
1670cabdff1aSopenharmony_ci                                          uint8_t *no_q)
1671cabdff1aSopenharmony_ci{
1672cabdff1aSopenharmony_ci    FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1673cabdff1aSopenharmony_ci                                beta, tc, no_p, no_q);
1674cabdff1aSopenharmony_ci}
1675cabdff1aSopenharmony_ci
1676cabdff1aSopenharmony_ci#undef P3
1677cabdff1aSopenharmony_ci#undef P2
1678cabdff1aSopenharmony_ci#undef P1
1679cabdff1aSopenharmony_ci#undef P0
1680cabdff1aSopenharmony_ci#undef Q0
1681cabdff1aSopenharmony_ci#undef Q1
1682cabdff1aSopenharmony_ci#undef Q2
1683cabdff1aSopenharmony_ci#undef Q3
1684cabdff1aSopenharmony_ci
1685cabdff1aSopenharmony_ci#undef TP3
1686cabdff1aSopenharmony_ci#undef TP2
1687cabdff1aSopenharmony_ci#undef TP1
1688cabdff1aSopenharmony_ci#undef TP0
1689cabdff1aSopenharmony_ci#undef TQ0
1690cabdff1aSopenharmony_ci#undef TQ1
1691cabdff1aSopenharmony_ci#undef TQ2
1692cabdff1aSopenharmony_ci#undef TQ3
1693