1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3cabdff1aSopenharmony_ci * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci/**
23cabdff1aSopenharmony_ci * @file
24cabdff1aSopenharmony_ci * H.264 / AVC / MPEG-4 part10 codec.
25cabdff1aSopenharmony_ci * non-SIMD x86-specific optimizations for H.264
26cabdff1aSopenharmony_ci * @author Michael Niedermayer <michaelni@gmx.at>
27cabdff1aSopenharmony_ci */
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ci#include <stddef.h>
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci#include "libavcodec/cabac.h"
32cabdff1aSopenharmony_ci#include "cabac.h"
33cabdff1aSopenharmony_ci
34cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci#if ARCH_X86_64
37cabdff1aSopenharmony_ci#define REG64 "r"
38cabdff1aSopenharmony_ci#else
39cabdff1aSopenharmony_ci#define REG64 "m"
40cabdff1aSopenharmony_ci#endif
41cabdff1aSopenharmony_ci
42cabdff1aSopenharmony_ci//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
43cabdff1aSopenharmony_ci//as that would make optimization work hard)
44cabdff1aSopenharmony_ci#if HAVE_7REGS && !BROKEN_COMPILER
45cabdff1aSopenharmony_ci#define decode_significance decode_significance_x86
46cabdff1aSopenharmony_cistatic int decode_significance_x86(CABACContext *c, int max_coeff,
47cabdff1aSopenharmony_ci                                   uint8_t *significant_coeff_ctx_base,
48cabdff1aSopenharmony_ci                                   int *index, x86_reg last_off){
49cabdff1aSopenharmony_ci    void *end= significant_coeff_ctx_base + max_coeff - 1;
50cabdff1aSopenharmony_ci    int minusstart= -(intptr_t)significant_coeff_ctx_base;
51cabdff1aSopenharmony_ci    int minusindex= 4-(intptr_t)index;
52cabdff1aSopenharmony_ci    int bit;
53cabdff1aSopenharmony_ci    x86_reg coeff_count;
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci#ifdef BROKEN_RELOCATIONS
56cabdff1aSopenharmony_ci    void *tables;
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci    __asm__ volatile(
59cabdff1aSopenharmony_ci        "lea   "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
60cabdff1aSopenharmony_ci        : "=&r"(tables)
61cabdff1aSopenharmony_ci        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
62cabdff1aSopenharmony_ci    );
63cabdff1aSopenharmony_ci#endif
64cabdff1aSopenharmony_ci
65cabdff1aSopenharmony_ci    __asm__ volatile(
66cabdff1aSopenharmony_ci        "3:                                     \n\t"
67cabdff1aSopenharmony_ci
68cabdff1aSopenharmony_ci        BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
69cabdff1aSopenharmony_ci                             "%5", "%q5", "%k0", "%b0",
70cabdff1aSopenharmony_ci                             "%c11(%6)", "%c12(%6)",
71cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
72cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
73cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
74cabdff1aSopenharmony_ci                             "%13")
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ci        "test $1, %4                            \n\t"
77cabdff1aSopenharmony_ci        " jz 4f                                 \n\t"
78cabdff1aSopenharmony_ci        "add  %10, %1                           \n\t"
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_ci        BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
81cabdff1aSopenharmony_ci                             "%5", "%q5", "%k0", "%b0",
82cabdff1aSopenharmony_ci                             "%c11(%6)", "%c12(%6)",
83cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
84cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
85cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
86cabdff1aSopenharmony_ci                             "%13")
87cabdff1aSopenharmony_ci
88cabdff1aSopenharmony_ci        "sub  %10, %1                           \n\t"
89cabdff1aSopenharmony_ci        "mov  %2, %0                            \n\t"
90cabdff1aSopenharmony_ci        "movl %7, %%ecx                         \n\t"
91cabdff1aSopenharmony_ci        "add  %1, %%"FF_REG_c"                  \n\t"
92cabdff1aSopenharmony_ci        "movl %%ecx, (%0)                       \n\t"
93cabdff1aSopenharmony_ci
94cabdff1aSopenharmony_ci        "test $1, %4                            \n\t"
95cabdff1aSopenharmony_ci        " jnz 5f                                \n\t"
96cabdff1aSopenharmony_ci
97cabdff1aSopenharmony_ci        "add"FF_OPSIZE"  $4, %2                 \n\t"
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci        "4:                                     \n\t"
100cabdff1aSopenharmony_ci        "add  $1, %1                            \n\t"
101cabdff1aSopenharmony_ci        "cmp  %8, %1                            \n\t"
102cabdff1aSopenharmony_ci        " jb 3b                                 \n\t"
103cabdff1aSopenharmony_ci        "mov  %2, %0                            \n\t"
104cabdff1aSopenharmony_ci        "movl %7, %%ecx                         \n\t"
105cabdff1aSopenharmony_ci        "add  %1, %%"FF_REG_c"                  \n\t"
106cabdff1aSopenharmony_ci        "movl %%ecx, (%0)                       \n\t"
107cabdff1aSopenharmony_ci        "5:                                     \n\t"
108cabdff1aSopenharmony_ci        "add  %9, %k0                           \n\t"
109cabdff1aSopenharmony_ci        "shr $2, %k0                            \n\t"
110cabdff1aSopenharmony_ci        : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
111cabdff1aSopenharmony_ci          "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
112cabdff1aSopenharmony_ci        : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
113cabdff1aSopenharmony_ci          "i"(offsetof(CABACContext, bytestream)),
114cabdff1aSopenharmony_ci          "i"(offsetof(CABACContext, bytestream_end))
115cabdff1aSopenharmony_ci          TABLES_ARG
116cabdff1aSopenharmony_ci        : "%"FF_REG_c, "memory"
117cabdff1aSopenharmony_ci    );
118cabdff1aSopenharmony_ci    return coeff_count;
119cabdff1aSopenharmony_ci}
120cabdff1aSopenharmony_ci
121cabdff1aSopenharmony_ci#define decode_significance_8x8 decode_significance_8x8_x86
122cabdff1aSopenharmony_cistatic int decode_significance_8x8_x86(CABACContext *c,
123cabdff1aSopenharmony_ci                                       uint8_t *significant_coeff_ctx_base,
124cabdff1aSopenharmony_ci                                       int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
125cabdff1aSopenharmony_ci    int minusindex= 4-(intptr_t)index;
126cabdff1aSopenharmony_ci    int bit;
127cabdff1aSopenharmony_ci    x86_reg coeff_count;
128cabdff1aSopenharmony_ci    x86_reg last=0;
129cabdff1aSopenharmony_ci    x86_reg state;
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ci#ifdef BROKEN_RELOCATIONS
132cabdff1aSopenharmony_ci    void *tables;
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_ci    __asm__ volatile(
135cabdff1aSopenharmony_ci        "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
136cabdff1aSopenharmony_ci        : "=&r"(tables)
137cabdff1aSopenharmony_ci        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
138cabdff1aSopenharmony_ci    );
139cabdff1aSopenharmony_ci#endif
140cabdff1aSopenharmony_ci
141cabdff1aSopenharmony_ci    __asm__ volatile(
142cabdff1aSopenharmony_ci        "mov %1, %6                             \n\t"
143cabdff1aSopenharmony_ci        "3:                                     \n\t"
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci        "mov %10, %0                            \n\t"
146cabdff1aSopenharmony_ci        "movzb (%0, %6), %6                     \n\t"
147cabdff1aSopenharmony_ci        "add %9, %6                             \n\t"
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_ci        BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
150cabdff1aSopenharmony_ci                             "%5", "%q5", "%k0", "%b0",
151cabdff1aSopenharmony_ci                             "%c12(%7)", "%c13(%7)",
152cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
153cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
154cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
155cabdff1aSopenharmony_ci                             "%15")
156cabdff1aSopenharmony_ci
157cabdff1aSopenharmony_ci        "mov %1, %6                             \n\t"
158cabdff1aSopenharmony_ci        "test $1, %4                            \n\t"
159cabdff1aSopenharmony_ci        " jz 4f                                 \n\t"
160cabdff1aSopenharmony_ci
161cabdff1aSopenharmony_ci#ifdef BROKEN_RELOCATIONS
162cabdff1aSopenharmony_ci        "movzb %c14(%15, %q6), %6\n\t"
163cabdff1aSopenharmony_ci#else
164cabdff1aSopenharmony_ci        "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t"
165cabdff1aSopenharmony_ci#endif
166cabdff1aSopenharmony_ci        "add %11, %6                            \n\t"
167cabdff1aSopenharmony_ci
168cabdff1aSopenharmony_ci        BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
169cabdff1aSopenharmony_ci                             "%5", "%q5", "%k0", "%b0",
170cabdff1aSopenharmony_ci                             "%c12(%7)", "%c13(%7)",
171cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
172cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
173cabdff1aSopenharmony_ci                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
174cabdff1aSopenharmony_ci                             "%15")
175cabdff1aSopenharmony_ci
176cabdff1aSopenharmony_ci        "mov %2, %0                             \n\t"
177cabdff1aSopenharmony_ci        "mov %1, %6                             \n\t"
178cabdff1aSopenharmony_ci        "mov %k6, (%0)                          \n\t"
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ci        "test $1, %4                            \n\t"
181cabdff1aSopenharmony_ci        " jnz 5f                                \n\t"
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci        "add"FF_OPSIZE"  $4, %2                 \n\t"
184cabdff1aSopenharmony_ci
185cabdff1aSopenharmony_ci        "4:                                     \n\t"
186cabdff1aSopenharmony_ci        "add $1, %6                             \n\t"
187cabdff1aSopenharmony_ci        "mov %6, %1                             \n\t"
188cabdff1aSopenharmony_ci        "cmp $63, %6                            \n\t"
189cabdff1aSopenharmony_ci        " jb 3b                                 \n\t"
190cabdff1aSopenharmony_ci        "mov %2, %0                             \n\t"
191cabdff1aSopenharmony_ci        "mov %k6, (%0)                          \n\t"
192cabdff1aSopenharmony_ci        "5:                                     \n\t"
193cabdff1aSopenharmony_ci        "addl %8, %k0                           \n\t"
194cabdff1aSopenharmony_ci        "shr $2, %k0                            \n\t"
195cabdff1aSopenharmony_ci        : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low),
196cabdff1aSopenharmony_ci          "=&r"(bit), "+&r"(c->range), "=&r"(state)
197cabdff1aSopenharmony_ci        : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
198cabdff1aSopenharmony_ci          REG64(sig_off), REG64(last_coeff_ctx_base),
199cabdff1aSopenharmony_ci          "i"(offsetof(CABACContext, bytestream)),
200cabdff1aSopenharmony_ci          "i"(offsetof(CABACContext, bytestream_end)),
201cabdff1aSopenharmony_ci          "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
202cabdff1aSopenharmony_ci        : "%"FF_REG_c, "memory"
203cabdff1aSopenharmony_ci    );
204cabdff1aSopenharmony_ci    return coeff_count;
205cabdff1aSopenharmony_ci}
206cabdff1aSopenharmony_ci#endif /* HAVE_7REGS && BROKEN_COMPILER */
207cabdff1aSopenharmony_ci
208cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */
209