1/* 2 * Loongson optimized cabac 3 * 4 * Copyright (c) 2020 Loongson Technology Corporation Limited 5 * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> 6 * Gu Xiwei(guxiwei-hf@loongson.cn) 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25#ifndef AVCODEC_LOONGARCH_CABAC_H 26#define AVCODEC_LOONGARCH_CABAC_H 27 28#include "libavcodec/cabac.h" 29#include "config.h" 30 31#define GET_CABAC_LOONGARCH_UNCBSR \ 32 "ld.bu %[bit], %[state], 0x0 \n\t" \ 33 "andi %[tmp0], %[c_range], 0xC0 \n\t" \ 34 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \ 35 "add.d %[tmp0], %[tmp0], %[tables] \n\t" \ 36 "add.d %[tmp0], %[tmp0], %[bit] \n\t" \ 37 /* tmp1: RangeLPS */ \ 38 "ld.bu %[tmp1], %[tmp0], %[lps_off] \n\t" \ 39 \ 40 "sub.d %[c_range], %[c_range], %[tmp1] \n\t" \ 41 "slli.d %[tmp0], %[c_range], 0x11 \n\t" \ 42 "bge %[tmp0], %[c_low], 1f \n\t" \ 43 "move %[c_range], %[tmp1] \n\t" \ 44 "nor %[bit], %[bit], %[bit] \n\t" \ 45 "sub.d %[c_low], %[c_low], %[tmp0] \n\t" \ 46 \ 47 "1: \n\t" \ 48 /* tmp1: *state */ \ 49 "add.d %[tmp0], %[tables], %[bit] \n\t" \ 50 "ld.bu %[tmp1], %[tmp0], %[mlps_off] \n\t" \ 51 /* tmp2: lps_mask */ \ 52 "add.d %[tmp0], %[tables], %[c_range] \n\t" \ 53 "ld.bu %[tmp2], %[tmp0], %[norm_off] \n\t" \ 54 \ 55 "andi %[bit], %[bit], 0x01 \n\t" \ 56 "st.b %[tmp1], %[state], 0x0 \n\t" \ 57 "sll.d %[c_range], %[c_range], %[tmp2] \n\t" \ 58 "sll.d %[c_low], %[c_low], %[tmp2] \n\t" \ 59 \ 60 "and %[tmp1], %[c_low], %[cabac_mask] \n\t" \ 61 "bnez %[tmp1], 1f \n\t" \ 62 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t" \ 63 "ctz.d %[tmp0], %[c_low] \n\t" \ 64 "addi.d %[tmp2], %[tmp0], -16 \n\t" \ 65 "revb.2h %[tmp0], %[tmp1] \n\t" \ 66 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \ 67 "sub.d %[tmp0], %[tmp0], %[cabac_mask] \n\t" \ 68 "sll.d %[tmp0], %[tmp0], %[tmp2] \n\t" \ 69 "add.d %[c_low], %[c_low], %[tmp0] \n\t" \ 70 "addi.d %[c_bytestream], %[c_bytestream], 0x02 \n\t" \ 71 "1: \n\t" \ 72 73#define GET_CABAC_LOONGARCH \ 74 "ld.bu %[bit], %[state], 0x0 \n\t" \ 75 "andi %[tmp0], %[c_range], 0xC0 \n\t" \ 76 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \ 77 "add.d %[tmp0], %[tmp0], %[tables] \n\t" \ 78 "add.d %[tmp0], %[tmp0], %[bit] \n\t" \ 79 /* tmp1: RangeLPS */ \ 80 "ld.bu %[tmp1], %[tmp0], %[lps_off] \n\t" \ 81 \ 82 "sub.d %[c_range], %[c_range], %[tmp1] \n\t" \ 83 "slli.d %[tmp0], %[c_range], 0x11 \n\t" \ 84 "bge %[tmp0], %[c_low], 1f \n\t" \ 85 "move %[c_range], %[tmp1] \n\t" \ 86 "nor %[bit], %[bit], %[bit] \n\t" \ 87 "sub.d %[c_low], %[c_low], %[tmp0] \n\t" \ 88 \ 89 "1: \n\t" \ 90 /* tmp1: *state */ \ 91 "add.d %[tmp0], %[tables], %[bit] \n\t" \ 92 "ld.bu %[tmp1], %[tmp0], %[mlps_off] \n\t" \ 93 /* tmp2: lps_mask */ \ 94 "add.d %[tmp0], %[tables], %[c_range] \n\t" \ 95 "ld.bu %[tmp2], %[tmp0], %[norm_off] \n\t" \ 96 \ 97 "andi %[bit], %[bit], 0x01 \n\t" \ 98 "st.b %[tmp1], %[state], 0x0 \n\t" \ 99 "sll.d %[c_range], %[c_range], %[tmp2] \n\t" \ 100 "sll.d %[c_low], %[c_low], %[tmp2] \n\t" \ 101 \ 102 "and %[tmp1], %[c_low], %[cabac_mask] \n\t" \ 103 "bnez %[tmp1], 1f \n\t" \ 104 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t" \ 105 "ctz.d %[tmp0], %[c_low] \n\t" \ 106 "addi.d %[tmp2], %[tmp0], -16 \n\t" \ 107 "revb.2h %[tmp0], %[tmp1] \n\t" \ 108 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \ 109 "sub.d %[tmp0], %[tmp0], %[cabac_mask] \n\t" \ 110 "sll.d %[tmp0], %[tmp0], %[tmp2] \n\t" \ 111 \ 112 "add.d %[c_low], %[c_low], %[tmp0] \n\t" \ 113 \ 114 "slt %[tmp0], %[c_bytestream], %[c_bytestream_end] \n\t" \ 115 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" \ 116 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" \ 117 "1: \n\t" \ 118 119#define get_cabac_inline get_cabac_inline_loongarch 120static av_always_inline 121int get_cabac_inline_loongarch(CABACContext *c, uint8_t * const state) 122{ 123 int64_t tmp0, tmp1, tmp2, bit; 124 125 __asm__ volatile ( 126#if UNCHECKED_BITSTREAM_READER 127 GET_CABAC_LOONGARCH_UNCBSR 128#else 129 GET_CABAC_LOONGARCH 130#endif 131 : [bit]"=&r"(bit), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), 132 [c_range]"+&r"(c->range), [c_low]"+&r"(c->low), 133 [c_bytestream]"+&r"(c->bytestream) 134 : [state]"r"(state), [tables]"r"(ff_h264_cabac_tables), 135#if !UNCHECKED_BITSTREAM_READER 136 [c_bytestream_end]"r"(c->bytestream_end), 137#endif 138 [lps_off]"i"(H264_LPS_RANGE_OFFSET), 139 [mlps_off]"i"(H264_MLPS_STATE_OFFSET + 128), 140 [norm_off]"i"(H264_NORM_SHIFT_OFFSET), 141 [cabac_mask]"r"(CABAC_MASK) 142 : "memory" 143 ); 144 145 return bit; 146} 147 148#define get_cabac_bypass get_cabac_bypass_loongarch 149static av_always_inline int get_cabac_bypass_loongarch(CABACContext *c) 150{ 151 int64_t tmp0, tmp1, tmp2; 152 int res = 0; 153 __asm__ volatile( 154 "slli.d %[c_low], %[c_low], 0x01 \n\t" 155 "and %[tmp0], %[c_low], %[cabac_mask] \n\t" 156 "bnez %[tmp0], 1f \n\t" 157 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t" 158#if UNCHECKED_BITSTREAM_READER 159 "addi.d %[c_bytestream], %[c_bytestream], 0x02 \n\t" 160#else 161 "slt %[tmp0], %[c_bytestream], %[c_bytestream_end] \n\t" 162 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" 163 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" 164#endif 165 "revb.2h %[tmp1], %[tmp1] \n\t" 166 "slli.d %[tmp1], %[tmp1], 0x01 \n\t" 167 "sub.d %[tmp1], %[tmp1], %[cabac_mask] \n\t" 168 "add.d %[c_low], %[c_low], %[tmp1] \n\t" 169 "1: \n\t" 170 "slli.d %[tmp1], %[c_range], 0x11 \n\t" 171 "slt %[tmp0], %[c_low], %[tmp1] \n\t" 172 "sub.d %[tmp1], %[c_low], %[tmp1] \n\t" 173 "masknez %[tmp2], %[one], %[tmp0] \n\t" 174 "maskeqz %[res], %[res], %[tmp0] \n\t" 175 "or %[res], %[res], %[tmp2] \n\t" 176 "masknez %[tmp2], %[tmp1], %[tmp0] \n\t" 177 "maskeqz %[c_low], %[c_low], %[tmp0] \n\t" 178 "or %[c_low], %[c_low], %[tmp2] \n\t" 179 : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), 180 [c_range]"+&r"(c->range), [c_low]"+&r"(c->low), 181 [c_bytestream]"+&r"(c->bytestream), [res]"+&r"(res) 182 : [cabac_mask]"r"(CABAC_MASK), 183#if !UNCHECKED_BITSTREAM_READER 184 [c_bytestream_end]"r"(c->bytestream_end), 185#endif 186 [one]"r"(0x01) 187 : "memory" 188 ); 189 return res; 190} 191 192#define get_cabac_bypass_sign get_cabac_bypass_sign_loongarch 193static av_always_inline 194int get_cabac_bypass_sign_loongarch(CABACContext *c, int val) 195{ 196 int64_t tmp0, tmp1; 197 int res = val; 198 __asm__ volatile( 199 "slli.d %[c_low], %[c_low], 0x01 \n\t" 200 "and %[tmp0], %[c_low], %[cabac_mask] \n\t" 201 "bnez %[tmp0], 1f \n\t" 202 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t" 203#if UNCHECKED_BITSTREAM_READER 204 "addi.d %[c_bytestream], %[c_bytestream], 0x02 \n\t" 205#else 206 "slt %[tmp0], %[c_bytestream], %[c_bytestream_end] \n\t" 207 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" 208 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" 209#endif 210 "revb.2h %[tmp1], %[tmp1] \n\t" 211 "slli.d %[tmp1], %[tmp1], 0x01 \n\t" 212 "sub.d %[tmp1], %[tmp1], %[cabac_mask] \n\t" 213 "add.d %[c_low], %[c_low], %[tmp1] \n\t" 214 "1: \n\t" 215 "slli.d %[tmp1], %[c_range], 0x11 \n\t" 216 "slt %[tmp0], %[c_low], %[tmp1] \n\t" 217 "sub.d %[tmp1], %[c_low], %[tmp1] \n\t" 218 "masknez %[tmp1], %[tmp1], %[tmp0] \n\t" 219 "maskeqz %[c_low], %[c_low], %[tmp0] \n\t" 220 "or %[c_low], %[c_low], %[tmp1] \n\t" 221 "sub.d %[tmp1], %[zero], %[res] \n\t" 222 "maskeqz %[tmp1], %[tmp1], %[tmp0] \n\t" 223 "masknez %[res], %[res], %[tmp0] \n\t" 224 "or %[res], %[res], %[tmp1] \n\t" 225 : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [res]"+&r"(res), 226 [c_range]"+&r"(c->range), [c_low]"+&r"(c->low), 227 [c_bytestream]"+&r"(c->bytestream) 228 : [cabac_mask]"r"(CABAC_MASK), 229#if !UNCHECKED_BITSTREAM_READER 230 [c_bytestream_end]"r"(c->bytestream_end), 231#endif 232 [zero]"r"(0x0) 233 : "memory" 234 ); 235 236 return res; 237} 238#endif /* AVCODEC_LOONGARCH_CABAC_H */ 239