1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3cabdff1aSopenharmony_ci * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci/** 23cabdff1aSopenharmony_ci * @file 24cabdff1aSopenharmony_ci * H.264 / AVC / MPEG-4 part10 codec. 25cabdff1aSopenharmony_ci * non-SIMD x86-specific optimizations for H.264 26cabdff1aSopenharmony_ci * @author Michael Niedermayer <michaelni@gmx.at> 27cabdff1aSopenharmony_ci */ 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#include <stddef.h> 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci#include "libavcodec/cabac.h" 32cabdff1aSopenharmony_ci#include "cabac.h" 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_ci#if HAVE_INLINE_ASM 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci#if ARCH_X86_64 37cabdff1aSopenharmony_ci#define REG64 "r" 38cabdff1aSopenharmony_ci#else 39cabdff1aSopenharmony_ci#define REG64 "m" 40cabdff1aSopenharmony_ci#endif 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_ci//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet 43cabdff1aSopenharmony_ci//as that would make optimization work hard) 44cabdff1aSopenharmony_ci#if HAVE_7REGS && !BROKEN_COMPILER 45cabdff1aSopenharmony_ci#define decode_significance decode_significance_x86 46cabdff1aSopenharmony_cistatic int decode_significance_x86(CABACContext *c, int max_coeff, 47cabdff1aSopenharmony_ci uint8_t *significant_coeff_ctx_base, 48cabdff1aSopenharmony_ci int *index, x86_reg last_off){ 49cabdff1aSopenharmony_ci void *end= significant_coeff_ctx_base + max_coeff - 1; 50cabdff1aSopenharmony_ci int minusstart= -(intptr_t)significant_coeff_ctx_base; 51cabdff1aSopenharmony_ci int minusindex= 4-(intptr_t)index; 52cabdff1aSopenharmony_ci int bit; 53cabdff1aSopenharmony_ci x86_reg coeff_count; 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci#ifdef BROKEN_RELOCATIONS 56cabdff1aSopenharmony_ci void *tables; 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci __asm__ volatile( 59cabdff1aSopenharmony_ci "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" 60cabdff1aSopenharmony_ci : "=&r"(tables) 61cabdff1aSopenharmony_ci : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables) 62cabdff1aSopenharmony_ci ); 63cabdff1aSopenharmony_ci#endif 64cabdff1aSopenharmony_ci 65cabdff1aSopenharmony_ci __asm__ volatile( 66cabdff1aSopenharmony_ci "3: \n\t" 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", 69cabdff1aSopenharmony_ci "%5", "%q5", "%k0", "%b0", 70cabdff1aSopenharmony_ci "%c11(%6)", "%c12(%6)", 71cabdff1aSopenharmony_ci AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 72cabdff1aSopenharmony_ci AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 73cabdff1aSopenharmony_ci AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 74cabdff1aSopenharmony_ci "%13") 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci "test $1, %4 \n\t" 77cabdff1aSopenharmony_ci " jz 4f \n\t" 78cabdff1aSopenharmony_ci "add %10, %1 \n\t" 79cabdff1aSopenharmony_ci 80cabdff1aSopenharmony_ci BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3", 81cabdff1aSopenharmony_ci "%5", "%q5", "%k0", "%b0", 82cabdff1aSopenharmony_ci "%c11(%6)", "%c12(%6)", 83cabdff1aSopenharmony_ci AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 84cabdff1aSopenharmony_ci AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 85cabdff1aSopenharmony_ci AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 86cabdff1aSopenharmony_ci "%13") 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci "sub %10, %1 \n\t" 89cabdff1aSopenharmony_ci "mov %2, %0 \n\t" 90cabdff1aSopenharmony_ci "movl %7, %%ecx \n\t" 91cabdff1aSopenharmony_ci "add %1, %%"FF_REG_c" \n\t" 92cabdff1aSopenharmony_ci "movl %%ecx, (%0) \n\t" 93cabdff1aSopenharmony_ci 94cabdff1aSopenharmony_ci "test $1, %4 \n\t" 95cabdff1aSopenharmony_ci " jnz 5f \n\t" 96cabdff1aSopenharmony_ci 97cabdff1aSopenharmony_ci "add"FF_OPSIZE" $4, %2 \n\t" 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci "4: \n\t" 100cabdff1aSopenharmony_ci "add $1, %1 \n\t" 101cabdff1aSopenharmony_ci "cmp %8, %1 \n\t" 102cabdff1aSopenharmony_ci " jb 3b \n\t" 103cabdff1aSopenharmony_ci "mov %2, %0 \n\t" 104cabdff1aSopenharmony_ci "movl %7, %%ecx \n\t" 105cabdff1aSopenharmony_ci "add %1, %%"FF_REG_c" \n\t" 106cabdff1aSopenharmony_ci "movl %%ecx, (%0) \n\t" 107cabdff1aSopenharmony_ci "5: \n\t" 108cabdff1aSopenharmony_ci "add %9, %k0 \n\t" 109cabdff1aSopenharmony_ci "shr $2, %k0 \n\t" 110cabdff1aSopenharmony_ci : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index), 111cabdff1aSopenharmony_ci "+&r"(c->low), "=&r"(bit), "+&r"(c->range) 112cabdff1aSopenharmony_ci : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), 113cabdff1aSopenharmony_ci "i"(offsetof(CABACContext, bytestream)), 114cabdff1aSopenharmony_ci "i"(offsetof(CABACContext, bytestream_end)) 115cabdff1aSopenharmony_ci TABLES_ARG 116cabdff1aSopenharmony_ci : "%"FF_REG_c, "memory" 117cabdff1aSopenharmony_ci ); 118cabdff1aSopenharmony_ci return coeff_count; 119cabdff1aSopenharmony_ci} 120cabdff1aSopenharmony_ci 121cabdff1aSopenharmony_ci#define decode_significance_8x8 decode_significance_8x8_x86 122cabdff1aSopenharmony_cistatic int decode_significance_8x8_x86(CABACContext *c, 123cabdff1aSopenharmony_ci uint8_t *significant_coeff_ctx_base, 124cabdff1aSopenharmony_ci int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){ 125cabdff1aSopenharmony_ci int minusindex= 4-(intptr_t)index; 126cabdff1aSopenharmony_ci int bit; 127cabdff1aSopenharmony_ci x86_reg coeff_count; 128cabdff1aSopenharmony_ci x86_reg last=0; 129cabdff1aSopenharmony_ci x86_reg state; 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci#ifdef BROKEN_RELOCATIONS 132cabdff1aSopenharmony_ci void *tables; 133cabdff1aSopenharmony_ci 134cabdff1aSopenharmony_ci __asm__ volatile( 135cabdff1aSopenharmony_ci "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" 136cabdff1aSopenharmony_ci : "=&r"(tables) 137cabdff1aSopenharmony_ci : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables) 138cabdff1aSopenharmony_ci ); 139cabdff1aSopenharmony_ci#endif 140cabdff1aSopenharmony_ci 141cabdff1aSopenharmony_ci __asm__ volatile( 142cabdff1aSopenharmony_ci "mov %1, %6 \n\t" 143cabdff1aSopenharmony_ci "3: \n\t" 144cabdff1aSopenharmony_ci 145cabdff1aSopenharmony_ci "mov %10, %0 \n\t" 146cabdff1aSopenharmony_ci "movzb (%0, %6), %6 \n\t" 147cabdff1aSopenharmony_ci "add %9, %6 \n\t" 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_ci BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", 150cabdff1aSopenharmony_ci "%5", "%q5", "%k0", "%b0", 151cabdff1aSopenharmony_ci "%c12(%7)", "%c13(%7)", 152cabdff1aSopenharmony_ci AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 153cabdff1aSopenharmony_ci AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 154cabdff1aSopenharmony_ci AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 155cabdff1aSopenharmony_ci "%15") 156cabdff1aSopenharmony_ci 157cabdff1aSopenharmony_ci "mov %1, %6 \n\t" 158cabdff1aSopenharmony_ci "test $1, %4 \n\t" 159cabdff1aSopenharmony_ci " jz 4f \n\t" 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_ci#ifdef BROKEN_RELOCATIONS 162cabdff1aSopenharmony_ci "movzb %c14(%15, %q6), %6\n\t" 163cabdff1aSopenharmony_ci#else 164cabdff1aSopenharmony_ci "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t" 165cabdff1aSopenharmony_ci#endif 166cabdff1aSopenharmony_ci "add %11, %6 \n\t" 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3", 169cabdff1aSopenharmony_ci "%5", "%q5", "%k0", "%b0", 170cabdff1aSopenharmony_ci "%c12(%7)", "%c13(%7)", 171cabdff1aSopenharmony_ci AV_STRINGIFY(H264_NORM_SHIFT_OFFSET), 172cabdff1aSopenharmony_ci AV_STRINGIFY(H264_LPS_RANGE_OFFSET), 173cabdff1aSopenharmony_ci AV_STRINGIFY(H264_MLPS_STATE_OFFSET), 174cabdff1aSopenharmony_ci "%15") 175cabdff1aSopenharmony_ci 176cabdff1aSopenharmony_ci "mov %2, %0 \n\t" 177cabdff1aSopenharmony_ci "mov %1, %6 \n\t" 178cabdff1aSopenharmony_ci "mov %k6, (%0) \n\t" 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci "test $1, %4 \n\t" 181cabdff1aSopenharmony_ci " jnz 5f \n\t" 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci "add"FF_OPSIZE" $4, %2 \n\t" 184cabdff1aSopenharmony_ci 185cabdff1aSopenharmony_ci "4: \n\t" 186cabdff1aSopenharmony_ci "add $1, %6 \n\t" 187cabdff1aSopenharmony_ci "mov %6, %1 \n\t" 188cabdff1aSopenharmony_ci "cmp $63, %6 \n\t" 189cabdff1aSopenharmony_ci " jb 3b \n\t" 190cabdff1aSopenharmony_ci "mov %2, %0 \n\t" 191cabdff1aSopenharmony_ci "mov %k6, (%0) \n\t" 192cabdff1aSopenharmony_ci "5: \n\t" 193cabdff1aSopenharmony_ci "addl %8, %k0 \n\t" 194cabdff1aSopenharmony_ci "shr $2, %k0 \n\t" 195cabdff1aSopenharmony_ci : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low), 196cabdff1aSopenharmony_ci "=&r"(bit), "+&r"(c->range), "=&r"(state) 197cabdff1aSopenharmony_ci : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), 198cabdff1aSopenharmony_ci REG64(sig_off), REG64(last_coeff_ctx_base), 199cabdff1aSopenharmony_ci "i"(offsetof(CABACContext, bytestream)), 200cabdff1aSopenharmony_ci "i"(offsetof(CABACContext, bytestream_end)), 201cabdff1aSopenharmony_ci "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG 202cabdff1aSopenharmony_ci : "%"FF_REG_c, "memory" 203cabdff1aSopenharmony_ci ); 204cabdff1aSopenharmony_ci return coeff_count; 205cabdff1aSopenharmony_ci} 206cabdff1aSopenharmony_ci#endif /* HAVE_7REGS && BROKEN_COMPILER */ 207cabdff1aSopenharmony_ci 208cabdff1aSopenharmony_ci#endif /* HAVE_INLINE_ASM */ 209