1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG-4 part10 codec. 25 * non-SIMD x86-specific optimizations for H.264 26 * @author Michael Niedermayer <michaelni@gmx.at> 27 */ 28 29#include <stddef.h> 30 31#include "libavcodec/cabac.h" 32#include "cabac.h" 33 34#if HAVE_INLINE_ASM 35 36#if ARCH_X86_64 37#define REG64 "r" 38#else 39#define REG64 "m" 40#endif 41 42//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet 43//as that would make optimization work hard) 44#if HAVE_7REGS && !BROKEN_COMPILER 45#define decode_significance decode_significance_x86 46static int decode_significance_x86(CABACContext *c, int max_coeff, 47 uint8_t *significant_coeff_ctx_base, 48 int *index, x86_reg last_off){ 49 void *end= significant_coeff_ctx_base + max_coeff - 1; 50 int minusstart= -(intptr_t)significant_coeff_ctx_base; 51 int minusindex= 4-(intptr_t)index; 52 int bit; 53 x86_reg coeff_count; 54 55#ifdef BROKEN_RELOCATIONS 56 void *tables; 57 58 __asm__ volatile( 59 "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" 60 : "=&r"(tables) 61 : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables) 62 ); 63#endif 64 65 __asm__ volatile( 66 "3: \n\t" 67 68 BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", 69 "%5", "%q5", "%k0", "%b0", 70 "%c11(%6)", "%c12(%6)", 71 AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 72 AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 73 AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 74 "%13") 75 76 "test $1, %4 \n\t" 77 " jz 4f \n\t" 78 "add %10, %1 \n\t" 79 80 BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", 81 "%5", "%q5", "%k0", "%b0", 82 "%c11(%6)", "%c12(%6)", 83 AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 84 AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 85 AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 86 "%13") 87 88 "sub %10, %1 \n\t" 89 "mov %2, %0 \n\t" 90 "movl %7, %%ecx \n\t" 91 "add %1, %%"FF_REG_c" \n\t" 92 "movl %%ecx, (%0) \n\t" 93 94 "test $1, %4 \n\t" 95 " jnz 5f \n\t" 96 97 "add"FF_OPSIZE" $4, %2 \n\t" 98 99 "4: \n\t" 100 "add $1, %1 \n\t" 101 "cmp %8, %1 \n\t" 102 " jb 3b \n\t" 103 "mov %2, %0 \n\t" 104 "movl %7, %%ecx \n\t" 105 "add %1, %%"FF_REG_c" \n\t" 106 "movl %%ecx, (%0) \n\t" 107 "5: \n\t" 108 "add %9, %k0 \n\t" 109 "shr $2, %k0 \n\t" 110 : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index), 111 "+&r"(c->low), "=&r"(bit), "+&r"(c->range) 112 : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), 113 "i"(offsetof(CABACContext, bytestream)), 114 "i"(offsetof(CABACContext, bytestream_end)) 115 TABLES_ARG 116 : "%"FF_REG_c, "memory" 117 ); 118 return coeff_count; 119} 120 121#define decode_significance_8x8 decode_significance_8x8_x86 122static int decode_significance_8x8_x86(CABACContext *c, 123 uint8_t *significant_coeff_ctx_base, 124 int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){ 125 int minusindex= 4-(intptr_t)index; 126 int bit; 127 x86_reg coeff_count; 128 x86_reg last=0; 129 x86_reg state; 130 131#ifdef BROKEN_RELOCATIONS 132 void *tables; 133 134 __asm__ volatile( 135 "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" 136 : "=&r"(tables) 137 : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables) 138 ); 139#endif 140 141 __asm__ volatile( 142 "mov %1, %6 \n\t" 143 "3: \n\t" 144 145 "mov %10, %0 \n\t" 146 "movzb (%0, %6), %6 \n\t" 147 "add %9, %6 \n\t" 148 149 BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", 150 "%5", "%q5", "%k0", "%b0", 151 "%c12(%7)", "%c13(%7)", 152 AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 153 AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 154 AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 155 "%15") 156 157 "mov %1, %6 \n\t" 158 "test $1, %4 \n\t" 159 " jz 4f \n\t" 160 161#ifdef BROKEN_RELOCATIONS 162 "movzb %c14(%15, %q6), %6\n\t" 163#else 164 "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t" 165#endif 166 "add %11, %6 \n\t" 167 168 BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", 169 "%5", "%q5", "%k0", "%b0", 170 "%c12(%7)", "%c13(%7)", 171 AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 172 AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 173 AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 174 "%15") 175 176 "mov %2, %0 \n\t" 177 "mov %1, %6 \n\t" 178 "mov %k6, (%0) \n\t" 179 180 "test $1, %4 \n\t" 181 " jnz 5f \n\t" 182 183 "add"FF_OPSIZE" $4, %2 \n\t" 184 185 "4: \n\t" 186 "add $1, %6 \n\t" 187 "mov %6, %1 \n\t" 188 "cmp $63, %6 \n\t" 189 " jb 3b \n\t" 190 "mov %2, %0 \n\t" 191 "mov %k6, (%0) \n\t" 192 "5: \n\t" 193 "addl %8, %k0 \n\t" 194 "shr $2, %k0 \n\t" 195 : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low), 196 "=&r"(bit), "+&r"(c->range), "=&r"(state) 197 : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), 198 REG64(sig_off), REG64(last_coeff_ctx_base), 199 "i"(offsetof(CABACContext, bytestream)), 200 "i"(offsetof(CABACContext, bytestream_end)), 201 "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG 202 : "%"FF_REG_c, "memory" 203 ); 204 return coeff_count; 205} 206#endif /* HAVE_7REGS && BROKEN_COMPILER */ 207 208#endif /* HAVE_INLINE_ASM */ 209