xref: /third_party/ffmpeg/libavcodec/x86/cabac.h (revision cabdff1a)
1/*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#ifndef AVCODEC_X86_CABAC_H
22#define AVCODEC_X86_CABAC_H
23
24#include <stddef.h>
25
26#include "libavcodec/cabac.h"
27#include "libavutil/attributes.h"
28#include "libavutil/macros.h"
29#include "libavutil/x86/asm.h"
30#include "config.h"
31
32#if   (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
33   || (                  !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
34   || (defined(__INTEL_COMPILER) && defined(_MSC_VER))
35#       define BROKEN_COMPILER 1
36#else
37#       define BROKEN_COMPILER 0
38#endif
39
40#if HAVE_INLINE_ASM
41
42#ifndef UNCHECKED_BITSTREAM_READER
43#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
44#endif
45
46#if UNCHECKED_BITSTREAM_READER
47#define END_CHECK(end) ""
48#else
49#define END_CHECK(end) \
50        "cmp    "end"       , %%"FF_REG_c"                              \n\t"\
51        "jge    1f                                                      \n\t"
52#endif
53
54#ifdef BROKEN_RELOCATIONS
55#define TABLES_ARG , "r"(tables)
56
57#if HAVE_FAST_CMOV
58#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
59        "cmp    "low"       , "tmp"                        \n\t"\
60        "cmova  %%ecx       , "range"                      \n\t"\
61        "sbb    %%rcx       , %%rcx                        \n\t"\
62        "and    %%ecx       , "tmp"                        \n\t"\
63        "xor    %%rcx       , "retq"                       \n\t"\
64        "sub    "tmp"       , "low"                        \n\t"
65#else /* HAVE_FAST_CMOV */
66#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
67/* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \
68        "sub    "low"       , "tmp"                        \n\t"\
69        "sar    $31         , "tmp"                        \n\t"\
70        "sub    %%ecx       , "range"                      \n\t"\
71        "and    "tmp"       , "range"                      \n\t"\
72        "add    %%ecx       , "range"                      \n\t"\
73        "shl    $17         , %%ecx                        \n\t"\
74        "and    "tmp"       , %%ecx                        \n\t"\
75        "sub    %%ecx       , "low"                        \n\t"\
76        "xor    "tmp"       , "ret"                        \n\t"\
77        "movslq "ret"       , "retq"                       \n\t"
78#endif /* HAVE_FAST_CMOV */
79
80#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
81        "movzbl "statep"    , "ret"                                     \n\t"\
82        "mov    "range"     , "tmp"                                     \n\t"\
83        "and    $0xC0       , "range"                                   \n\t"\
84        "lea    ("ret", "range", 2), %%ecx                              \n\t"\
85        "movzbl "lps_off"("tables", %%rcx), "range"                     \n\t"\
86        "sub    "range"     , "tmp"                                     \n\t"\
87        "mov    "tmp"       , %%ecx                                     \n\t"\
88        "shl    $17         , "tmp"                                     \n\t"\
89        BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp)              \
90        "movzbl "norm_off"("tables", "rangeq"), %%ecx                   \n\t"\
91        "shl    %%cl        , "range"                                   \n\t"\
92        "movzbl "mlps_off"+128("tables", "retq"), "tmp"                 \n\t"\
93        "shl    %%cl        , "low"                                     \n\t"\
94        "mov    "tmpbyte"   , "statep"                                  \n\t"\
95        "test   "lowword"   , "lowword"                                 \n\t"\
96        "jnz    2f                                                      \n\t"\
97        "mov    "byte"      , %%"FF_REG_c"                              \n\t"\
98        END_CHECK(end)\
99        "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\
100        "1:                                                             \n\t"\
101        "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\
102        "lea    -1("low")   , %%ecx                                     \n\t"\
103        "xor    "low"       , %%ecx                                     \n\t"\
104        "shr    $15         , %%ecx                                     \n\t"\
105        "bswap  "tmp"                                                   \n\t"\
106        "shr    $15         , "tmp"                                     \n\t"\
107        "movzbl "norm_off"("tables", %%rcx), %%ecx                      \n\t"\
108        "sub    $0xFFFF     , "tmp"                                     \n\t"\
109        "neg    %%ecx                                                   \n\t"\
110        "add    $7          , %%ecx                                     \n\t"\
111        "shl    %%cl        , "tmp"                                     \n\t"\
112        "add    "tmp"       , "low"                                     \n\t"\
113        "2:                                                             \n\t"
114
115#else /* BROKEN_RELOCATIONS */
116#define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
117#define RIP_ARG
118
119#if HAVE_FAST_CMOV
120#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
121        "mov    "tmp"       , %%ecx     \n\t"\
122        "shl    $17         , "tmp"     \n\t"\
123        "cmp    "low"       , "tmp"     \n\t"\
124        "cmova  %%ecx       , "range"   \n\t"\
125        "sbb    %%ecx       , %%ecx     \n\t"\
126        "and    %%ecx       , "tmp"     \n\t"\
127        "xor    %%ecx       , "ret"     \n\t"\
128        "sub    "tmp"       , "low"     \n\t"
129#else /* HAVE_FAST_CMOV */
130#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
131        "mov    "tmp"       , %%ecx     \n\t"\
132        "shl    $17         , "tmp"     \n\t"\
133        "sub    "low"       , "tmp"     \n\t"\
134        "sar    $31         , "tmp"     \n\t" /*lps_mask*/\
135        "sub    %%ecx       , "range"   \n\t" /*RangeLPS - range*/\
136        "and    "tmp"       , "range"   \n\t" /*(RangeLPS - range)&lps_mask*/\
137        "add    %%ecx       , "range"   \n\t" /*new range*/\
138        "shl    $17         , %%ecx     \n\t"\
139        "and    "tmp"       , %%ecx     \n\t"\
140        "sub    %%ecx       , "low"     \n\t"\
141        "xor    "tmp"       , "ret"     \n\t"
142#endif /* HAVE_FAST_CMOV */
143
144#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
145        "movzbl "statep"    , "ret"                                     \n\t"\
146        "mov    "range"     , "tmp"                                     \n\t"\
147        "and    $0xC0       , "range"                                   \n\t"\
148        "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
149        "sub    "range"     , "tmp"                                     \n\t"\
150        BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)                    \
151        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx    \n\t"\
152        "shl    %%cl        , "range"                                   \n\t"\
153        "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp"  \n\t"\
154        "shl    %%cl        , "low"                                     \n\t"\
155        "mov    "tmpbyte"   , "statep"                                  \n\t"\
156        "test   "lowword"   , "lowword"                                 \n\t"\
157        " jnz   2f                                                      \n\t"\
158        "mov    "byte"      , %%"FF_REG_c"                              \n\t"\
159        END_CHECK(end)\
160        "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\
161        "1:                                                             \n\t"\
162        "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\
163        "lea    -1("low")   , %%ecx                                     \n\t"\
164        "xor    "low"       , %%ecx                                     \n\t"\
165        "shr    $15         , %%ecx                                     \n\t"\
166        "bswap  "tmp"                                                   \n\t"\
167        "shr    $15         , "tmp"                                     \n\t"\
168        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
169        "sub    $0xFFFF     , "tmp"                                     \n\t"\
170        "neg    %%ecx                                                   \n\t"\
171        "add    $7          , %%ecx                                     \n\t"\
172        "shl    %%cl        , "tmp"                                     \n\t"\
173        "add    "tmp"       , "low"                                     \n\t"\
174        "2:                                                             \n\t"
175
176#endif /* BROKEN_RELOCATIONS */
177
178#if HAVE_7REGS && !BROKEN_COMPILER
179#define get_cabac_inline get_cabac_inline_x86
180static
181#if defined(_WIN32) && !defined(_WIN64) && defined(__clang__)
182av_noinline
183#else
184av_always_inline
185#endif
186int get_cabac_inline_x86(CABACContext *c, uint8_t *const state)
187{
188    int bit, tmp;
189#ifdef BROKEN_RELOCATIONS
190    void *tables;
191
192    __asm__ volatile(
193        "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
194        : "=&r"(tables)
195        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
196    );
197#endif
198
199    __asm__ volatile(
200        BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
201                             "%2", "%q2", "%3", "%b3",
202                             "%c6(%5)", "%c7(%5)",
203                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
204                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
205                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
206                             "%8")
207        : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
208        : "r"(state), "r"(c),
209          "i"(offsetof(CABACContext, bytestream)),
210          "i"(offsetof(CABACContext, bytestream_end))
211          TABLES_ARG
212          ,"1"(c->low), "2"(c->range)
213        : "%"FF_REG_c, "memory"
214    );
215    return bit & 1;
216}
217#endif /* HAVE_7REGS && !BROKEN_COMPILER */
218
219#if !BROKEN_COMPILER
220#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
221static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
222{
223    x86_reg tmp;
224    __asm__ volatile(
225        "movl        %c6(%2), %k1       \n\t"
226        "movl        %c3(%2), %%eax     \n\t"
227        "shl             $17, %k1       \n\t"
228        "add           %%eax, %%eax     \n\t"
229        "sub             %k1, %%eax     \n\t"
230        "cdq                            \n\t"
231        "and           %%edx, %k1       \n\t"
232        "add             %k1, %%eax     \n\t"
233        "xor           %%edx, %%ecx     \n\t"
234        "sub           %%edx, %%ecx     \n\t"
235        "test           %%ax, %%ax      \n\t"
236        "jnz              1f            \n\t"
237        "mov         %c4(%2), %1        \n\t"
238        "subl        $0xFFFF, %%eax     \n\t"
239        "movzwl         (%1), %%edx     \n\t"
240        "bswap         %%edx            \n\t"
241        "shrl            $15, %%edx     \n\t"
242#if UNCHECKED_BITSTREAM_READER
243        "add              $2, %1        \n\t"
244        "addl          %%edx, %%eax     \n\t"
245        "mov              %1, %c4(%2)   \n\t"
246#else
247        "addl          %%edx, %%eax     \n\t"
248        "cmp         %c5(%2), %1        \n\t"
249        "jge              1f            \n\t"
250        "add"FF_OPSIZE"   $2, %c4(%2)   \n\t"
251#endif
252        "1:                             \n\t"
253        "movl          %%eax, %c3(%2)   \n\t"
254
255        : "+c"(val), "=&r"(tmp)
256        : "r"(c),
257          "i"(offsetof(CABACContext, low)),
258          "i"(offsetof(CABACContext, bytestream)),
259          "i"(offsetof(CABACContext, bytestream_end)),
260          "i"(offsetof(CABACContext, range))
261        : "%eax", "%edx", "memory"
262    );
263    return val;
264}
265
266#define get_cabac_bypass get_cabac_bypass_x86
267static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
268{
269    x86_reg tmp;
270    int res;
271    __asm__ volatile(
272        "movl        %c6(%2), %k1       \n\t"
273        "movl        %c3(%2), %%eax     \n\t"
274        "shl             $17, %k1       \n\t"
275        "add           %%eax, %%eax     \n\t"
276        "sub             %k1, %%eax     \n\t"
277        "cdq                            \n\t"
278        "and           %%edx, %k1       \n\t"
279        "add             %k1, %%eax     \n\t"
280        "inc           %%edx            \n\t"
281        "test           %%ax, %%ax      \n\t"
282        "jnz              1f            \n\t"
283        "mov         %c4(%2), %1        \n\t"
284        "subl        $0xFFFF, %%eax     \n\t"
285        "movzwl         (%1), %%ecx     \n\t"
286        "bswap         %%ecx            \n\t"
287        "shrl            $15, %%ecx     \n\t"
288        "addl          %%ecx, %%eax     \n\t"
289        "cmp         %c5(%2), %1        \n\t"
290        "jge              1f            \n\t"
291        "add"FF_OPSIZE"   $2, %c4(%2)   \n\t"
292        "1:                             \n\t"
293        "movl          %%eax, %c3(%2)   \n\t"
294
295        : "=&d"(res), "=&r"(tmp)
296        : "r"(c),
297          "i"(offsetof(CABACContext, low)),
298          "i"(offsetof(CABACContext, bytestream)),
299          "i"(offsetof(CABACContext, bytestream_end)),
300          "i"(offsetof(CABACContext, range))
301        : "%eax", "%ecx", "memory"
302    );
303    return res;
304}
305#endif /* !BROKEN_COMPILER */
306
307#endif /* HAVE_INLINE_ASM */
308#endif /* AVCODEC_X86_CABAC_H */
309