1/*
2 * Loongson  optimized cabac
3 *
4 * Copyright (c) 2020 Loongson Technology Corporation Limited
5 * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
6 *                Gu Xiwei(guxiwei-hf@loongson.cn)
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#ifndef AVCODEC_LOONGARCH_CABAC_H
26#define AVCODEC_LOONGARCH_CABAC_H
27
28#include "libavcodec/cabac.h"
29#include "config.h"
30
31#define GET_CABAC_LOONGARCH_UNCBSR                                      \
32    "ld.bu        %[bit],        %[state],       0x0           \n\t"    \
33    "andi         %[tmp0],       %[c_range],     0xC0          \n\t"    \
34    "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
35    "add.d        %[tmp0],       %[tmp0],        %[tables]     \n\t"    \
36    "add.d        %[tmp0],       %[tmp0],        %[bit]        \n\t"    \
37    /* tmp1: RangeLPS */                                                \
38    "ld.bu        %[tmp1],       %[tmp0],        %[lps_off]    \n\t"    \
39                                                                        \
40    "sub.d        %[c_range],    %[c_range],     %[tmp1]       \n\t"    \
41    "slli.d       %[tmp0],       %[c_range],     0x11          \n\t"    \
42    "bge          %[tmp0],       %[c_low],       1f            \n\t"    \
43    "move         %[c_range],    %[tmp1]                       \n\t"    \
44    "nor          %[bit],        %[bit],         %[bit]        \n\t"    \
45    "sub.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
46                                                                        \
47    "1:                                                        \n\t"    \
48    /* tmp1: *state */                                                  \
49    "add.d        %[tmp0],       %[tables],      %[bit]        \n\t"    \
50    "ld.bu        %[tmp1],       %[tmp0],        %[mlps_off]   \n\t"    \
51    /* tmp2: lps_mask */                                                \
52    "add.d        %[tmp0],       %[tables],      %[c_range]    \n\t"    \
53    "ld.bu        %[tmp2],       %[tmp0],        %[norm_off]   \n\t"    \
54                                                                        \
55    "andi         %[bit],        %[bit],         0x01          \n\t"    \
56    "st.b         %[tmp1],       %[state],       0x0           \n\t"    \
57    "sll.d        %[c_range],    %[c_range],     %[tmp2]       \n\t"    \
58    "sll.d        %[c_low],      %[c_low],       %[tmp2]       \n\t"    \
59                                                                        \
60    "and          %[tmp1],       %[c_low],       %[cabac_mask] \n\t"    \
61    "bnez         %[tmp1],       1f                            \n\t"    \
62    "ld.hu        %[tmp1],       %[c_bytestream], 0x0          \n\t"    \
63    "ctz.d        %[tmp0],       %[c_low]                      \n\t"    \
64    "addi.d       %[tmp2],       %[tmp0],        -16           \n\t"    \
65    "revb.2h      %[tmp0],       %[tmp1]                       \n\t"    \
66    "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
67    "sub.d        %[tmp0],       %[tmp0],        %[cabac_mask] \n\t"    \
68    "sll.d        %[tmp0],       %[tmp0],        %[tmp2]       \n\t"    \
69    "add.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
70    "addi.d       %[c_bytestream], %[c_bytestream],     0x02   \n\t"    \
71    "1:                                                        \n\t"    \
72
73#define GET_CABAC_LOONGARCH                                             \
74    "ld.bu        %[bit],        %[state],       0x0           \n\t"    \
75    "andi         %[tmp0],       %[c_range],     0xC0          \n\t"    \
76    "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
77    "add.d        %[tmp0],       %[tmp0],        %[tables]     \n\t"    \
78    "add.d        %[tmp0],       %[tmp0],        %[bit]        \n\t"    \
79    /* tmp1: RangeLPS */                                                \
80    "ld.bu        %[tmp1],       %[tmp0],        %[lps_off]    \n\t"    \
81                                                                        \
82    "sub.d        %[c_range],    %[c_range],     %[tmp1]       \n\t"    \
83    "slli.d       %[tmp0],       %[c_range],     0x11          \n\t"    \
84    "bge          %[tmp0],       %[c_low],       1f            \n\t"    \
85    "move         %[c_range],    %[tmp1]                       \n\t"    \
86    "nor          %[bit],        %[bit],         %[bit]        \n\t"    \
87    "sub.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
88                                                                        \
89    "1:                                                        \n\t"    \
90    /* tmp1: *state */                                                  \
91    "add.d        %[tmp0],       %[tables],      %[bit]        \n\t"    \
92    "ld.bu        %[tmp1],       %[tmp0],        %[mlps_off]   \n\t"    \
93    /* tmp2: lps_mask */                                                \
94    "add.d        %[tmp0],       %[tables],      %[c_range]    \n\t"    \
95    "ld.bu        %[tmp2],       %[tmp0],        %[norm_off]   \n\t"    \
96                                                                        \
97    "andi         %[bit],        %[bit],         0x01          \n\t"    \
98    "st.b         %[tmp1],       %[state],       0x0           \n\t"    \
99    "sll.d        %[c_range],    %[c_range],     %[tmp2]       \n\t"    \
100    "sll.d        %[c_low],      %[c_low],       %[tmp2]       \n\t"    \
101                                                                        \
102    "and          %[tmp1],       %[c_low],       %[cabac_mask] \n\t"    \
103    "bnez         %[tmp1],       1f                            \n\t"    \
104    "ld.hu        %[tmp1],       %[c_bytestream], 0x0          \n\t"    \
105    "ctz.d        %[tmp0],       %[c_low]                      \n\t"    \
106    "addi.d       %[tmp2],       %[tmp0],        -16           \n\t"    \
107    "revb.2h      %[tmp0],       %[tmp1]                       \n\t"    \
108    "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
109    "sub.d        %[tmp0],       %[tmp0],        %[cabac_mask] \n\t"    \
110    "sll.d        %[tmp0],       %[tmp0],        %[tmp2]       \n\t"    \
111                                                                        \
112    "add.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
113                                                                        \
114    "slt      %[tmp0],  %[c_bytestream],  %[c_bytestream_end]  \n\t"    \
115    "add.d    %[c_bytestream], %[c_bytestream],     %[tmp0]    \n\t"    \
116    "add.d    %[c_bytestream], %[c_bytestream],     %[tmp0]    \n\t"    \
117    "1:                                                        \n\t"    \
118
119#define get_cabac_inline get_cabac_inline_loongarch
120static av_always_inline
121int get_cabac_inline_loongarch(CABACContext *c, uint8_t * const state)
122{
123    int64_t tmp0, tmp1, tmp2, bit;
124
125    __asm__ volatile (
126#if UNCHECKED_BITSTREAM_READER
127        GET_CABAC_LOONGARCH_UNCBSR
128#else
129        GET_CABAC_LOONGARCH
130#endif
131    : [bit]"=&r"(bit), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
132      [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
133      [c_bytestream]"+&r"(c->bytestream)
134    : [state]"r"(state), [tables]"r"(ff_h264_cabac_tables),
135#if !UNCHECKED_BITSTREAM_READER
136      [c_bytestream_end]"r"(c->bytestream_end),
137#endif
138      [lps_off]"i"(H264_LPS_RANGE_OFFSET),
139      [mlps_off]"i"(H264_MLPS_STATE_OFFSET + 128),
140      [norm_off]"i"(H264_NORM_SHIFT_OFFSET),
141      [cabac_mask]"r"(CABAC_MASK)
142    : "memory"
143    );
144
145    return bit;
146}
147
148#define get_cabac_bypass get_cabac_bypass_loongarch
149static av_always_inline int get_cabac_bypass_loongarch(CABACContext *c)
150{
151    int64_t tmp0, tmp1, tmp2;
152    int res = 0;
153    __asm__ volatile(
154        "slli.d     %[c_low],        %[c_low],        0x01                \n\t"
155        "and        %[tmp0],         %[c_low],        %[cabac_mask]       \n\t"
156        "bnez       %[tmp0],         1f                                   \n\t"
157        "ld.hu      %[tmp1],         %[c_bytestream], 0x0                 \n\t"
158#if UNCHECKED_BITSTREAM_READER
159        "addi.d     %[c_bytestream], %[c_bytestream], 0x02                \n\t"
160#else
161        "slt        %[tmp0],         %[c_bytestream], %[c_bytestream_end] \n\t"
162        "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
163        "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
164#endif
165        "revb.2h    %[tmp1],         %[tmp1]                              \n\t"
166        "slli.d     %[tmp1],         %[tmp1],         0x01                \n\t"
167        "sub.d      %[tmp1],         %[tmp1],         %[cabac_mask]       \n\t"
168        "add.d      %[c_low],        %[c_low],        %[tmp1]             \n\t"
169        "1:                                                               \n\t"
170        "slli.d     %[tmp1],         %[c_range],      0x11                \n\t"
171        "slt        %[tmp0],         %[c_low],        %[tmp1]             \n\t"
172        "sub.d      %[tmp1],         %[c_low],        %[tmp1]             \n\t"
173        "masknez    %[tmp2],         %[one],          %[tmp0]             \n\t"
174        "maskeqz    %[res],          %[res],          %[tmp0]             \n\t"
175        "or         %[res],          %[res],          %[tmp2]             \n\t"
176        "masknez    %[tmp2],         %[tmp1],         %[tmp0]             \n\t"
177        "maskeqz    %[c_low],        %[c_low],        %[tmp0]             \n\t"
178        "or         %[c_low],        %[c_low],        %[tmp2]             \n\t"
179        : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
180          [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
181          [c_bytestream]"+&r"(c->bytestream), [res]"+&r"(res)
182        : [cabac_mask]"r"(CABAC_MASK),
183#if !UNCHECKED_BITSTREAM_READER
184          [c_bytestream_end]"r"(c->bytestream_end),
185#endif
186          [one]"r"(0x01)
187        : "memory"
188    );
189    return res;
190}
191
192#define get_cabac_bypass_sign get_cabac_bypass_sign_loongarch
193static av_always_inline
194int get_cabac_bypass_sign_loongarch(CABACContext *c, int val)
195{
196    int64_t tmp0, tmp1;
197    int res = val;
198    __asm__ volatile(
199        "slli.d     %[c_low],        %[c_low],        0x01                \n\t"
200        "and        %[tmp0],         %[c_low],        %[cabac_mask]       \n\t"
201        "bnez       %[tmp0],         1f                                   \n\t"
202        "ld.hu      %[tmp1],         %[c_bytestream], 0x0                 \n\t"
203#if UNCHECKED_BITSTREAM_READER
204        "addi.d     %[c_bytestream], %[c_bytestream], 0x02                \n\t"
205#else
206        "slt        %[tmp0],         %[c_bytestream], %[c_bytestream_end] \n\t"
207        "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
208        "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
209#endif
210        "revb.2h    %[tmp1],         %[tmp1]                              \n\t"
211        "slli.d     %[tmp1],         %[tmp1],         0x01                \n\t"
212        "sub.d      %[tmp1],         %[tmp1],         %[cabac_mask]       \n\t"
213        "add.d      %[c_low],        %[c_low],        %[tmp1]             \n\t"
214        "1:                                                               \n\t"
215        "slli.d     %[tmp1],         %[c_range],      0x11                \n\t"
216        "slt        %[tmp0],         %[c_low],        %[tmp1]             \n\t"
217        "sub.d      %[tmp1],         %[c_low],        %[tmp1]             \n\t"
218        "masknez    %[tmp1],         %[tmp1],         %[tmp0]             \n\t"
219        "maskeqz    %[c_low],        %[c_low],        %[tmp0]             \n\t"
220        "or         %[c_low],        %[c_low],        %[tmp1]             \n\t"
221        "sub.d      %[tmp1],         %[zero],         %[res]              \n\t"
222        "maskeqz    %[tmp1],         %[tmp1],         %[tmp0]             \n\t"
223        "masknez    %[res],          %[res],          %[tmp0]             \n\t"
224        "or         %[res],          %[res],          %[tmp1]             \n\t"
225        : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [res]"+&r"(res),
226          [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
227          [c_bytestream]"+&r"(c->bytestream)
228        : [cabac_mask]"r"(CABAC_MASK),
229#if !UNCHECKED_BITSTREAM_READER
230          [c_bytestream_end]"r"(c->bytestream_end),
231#endif
232          [zero]"r"(0x0)
233        : "memory"
234    );
235
236    return res;
237}
238#endif /* AVCODEC_LOONGARCH_CABAC_H */
239