1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * H.264 / AVC / MPEG-4 part10 codec.
25 * non-SIMD x86-specific optimizations for H.264
26 * @author Michael Niedermayer <michaelni@gmx.at>
27 */
28
29#include <stddef.h>
30
31#include "libavcodec/cabac.h"
32#include "cabac.h"
33
34#if HAVE_INLINE_ASM
35
36#if ARCH_X86_64
37#define REG64 "r"
38#else
39#define REG64 "m"
40#endif
41
42//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
43//as that would make optimization work hard)
44#if HAVE_7REGS && !BROKEN_COMPILER
45#define decode_significance decode_significance_x86
46static int decode_significance_x86(CABACContext *c, int max_coeff,
47                                   uint8_t *significant_coeff_ctx_base,
48                                   int *index, x86_reg last_off){
49    void *end= significant_coeff_ctx_base + max_coeff - 1;
50    int minusstart= -(intptr_t)significant_coeff_ctx_base;
51    int minusindex= 4-(intptr_t)index;
52    int bit;
53    x86_reg coeff_count;
54
55#ifdef BROKEN_RELOCATIONS
56    void *tables;
57
58    __asm__ volatile(
59        "lea   "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
60        : "=&r"(tables)
61        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
62    );
63#endif
64
65    __asm__ volatile(
66        "3:                                     \n\t"
67
68        BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
69                             "%5", "%q5", "%k0", "%b0",
70                             "%c11(%6)", "%c12(%6)",
71                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
72                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
73                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
74                             "%13")
75
76        "test $1, %4                            \n\t"
77        " jz 4f                                 \n\t"
78        "add  %10, %1                           \n\t"
79
80        BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
81                             "%5", "%q5", "%k0", "%b0",
82                             "%c11(%6)", "%c12(%6)",
83                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
84                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
85                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
86                             "%13")
87
88        "sub  %10, %1                           \n\t"
89        "mov  %2, %0                            \n\t"
90        "movl %7, %%ecx                         \n\t"
91        "add  %1, %%"FF_REG_c"                  \n\t"
92        "movl %%ecx, (%0)                       \n\t"
93
94        "test $1, %4                            \n\t"
95        " jnz 5f                                \n\t"
96
97        "add"FF_OPSIZE"  $4, %2                 \n\t"
98
99        "4:                                     \n\t"
100        "add  $1, %1                            \n\t"
101        "cmp  %8, %1                            \n\t"
102        " jb 3b                                 \n\t"
103        "mov  %2, %0                            \n\t"
104        "movl %7, %%ecx                         \n\t"
105        "add  %1, %%"FF_REG_c"                  \n\t"
106        "movl %%ecx, (%0)                       \n\t"
107        "5:                                     \n\t"
108        "add  %9, %k0                           \n\t"
109        "shr $2, %k0                            \n\t"
110        : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
111          "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
112        : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
113          "i"(offsetof(CABACContext, bytestream)),
114          "i"(offsetof(CABACContext, bytestream_end))
115          TABLES_ARG
116        : "%"FF_REG_c, "memory"
117    );
118    return coeff_count;
119}
120
121#define decode_significance_8x8 decode_significance_8x8_x86
122static int decode_significance_8x8_x86(CABACContext *c,
123                                       uint8_t *significant_coeff_ctx_base,
124                                       int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
125    int minusindex= 4-(intptr_t)index;
126    int bit;
127    x86_reg coeff_count;
128    x86_reg last=0;
129    x86_reg state;
130
131#ifdef BROKEN_RELOCATIONS
132    void *tables;
133
134    __asm__ volatile(
135        "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
136        : "=&r"(tables)
137        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
138    );
139#endif
140
141    __asm__ volatile(
142        "mov %1, %6                             \n\t"
143        "3:                                     \n\t"
144
145        "mov %10, %0                            \n\t"
146        "movzb (%0, %6), %6                     \n\t"
147        "add %9, %6                             \n\t"
148
149        BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
150                             "%5", "%q5", "%k0", "%b0",
151                             "%c12(%7)", "%c13(%7)",
152                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
153                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
154                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
155                             "%15")
156
157        "mov %1, %6                             \n\t"
158        "test $1, %4                            \n\t"
159        " jz 4f                                 \n\t"
160
161#ifdef BROKEN_RELOCATIONS
162        "movzb %c14(%15, %q6), %6\n\t"
163#else
164        "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t"
165#endif
166        "add %11, %6                            \n\t"
167
168        BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
169                             "%5", "%q5", "%k0", "%b0",
170                             "%c12(%7)", "%c13(%7)",
171                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
172                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
173                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
174                             "%15")
175
176        "mov %2, %0                             \n\t"
177        "mov %1, %6                             \n\t"
178        "mov %k6, (%0)                          \n\t"
179
180        "test $1, %4                            \n\t"
181        " jnz 5f                                \n\t"
182
183        "add"FF_OPSIZE"  $4, %2                 \n\t"
184
185        "4:                                     \n\t"
186        "add $1, %6                             \n\t"
187        "mov %6, %1                             \n\t"
188        "cmp $63, %6                            \n\t"
189        " jb 3b                                 \n\t"
190        "mov %2, %0                             \n\t"
191        "mov %k6, (%0)                          \n\t"
192        "5:                                     \n\t"
193        "addl %8, %k0                           \n\t"
194        "shr $2, %k0                            \n\t"
195        : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low),
196          "=&r"(bit), "+&r"(c->range), "=&r"(state)
197        : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
198          REG64(sig_off), REG64(last_coeff_ctx_base),
199          "i"(offsetof(CABACContext, bytestream)),
200          "i"(offsetof(CABACContext, bytestream_end)),
201          "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
202        : "%"FF_REG_c, "memory"
203    );
204    return coeff_count;
205}
206#endif /* HAVE_7REGS && BROKEN_COMPILER */
207
208#endif /* HAVE_INLINE_ASM */
209